{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import numpy as np, pandas as pd\n",
    "from keras.preprocessing.text import Tokenizer\n",
    "from keras.preprocessing.sequence import pad_sequences\n",
    "from keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation\n",
    "from keras.layers import Bidirectional, GlobalMaxPool1D\n",
    "from keras.models import Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.语料"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = r'datasets/'\n",
    "comp = r'jigsaw-toxic-comment-classification-challenge/'\n",
    "EMBEDDING_FILE = r'models/glove.6B/glove.6B.50d.txt'\n",
    "TRAIN_DATA_DIR = f'{path}{comp}train.csv'\n",
    "TEST_DATA_DIR = f'{path}{comp}test.csv'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "embed_size = 50\n",
    "max_features = 20000\n",
    "maxlen = 100  # 使用评论的前100个单词\n",
    "\n",
    "train = pd.read_csv(TRAIN_DATA_DIR)\n",
    "test = pd.read_csv(TEST_DATA_DIR)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "查看语料数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>comment_text</th>\n",
       "      <th>toxic</th>\n",
       "      <th>severe_toxic</th>\n",
       "      <th>obscene</th>\n",
       "      <th>threat</th>\n",
       "      <th>insult</th>\n",
       "      <th>identity_hate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0000997932d777bf</td>\n",
       "      <td>Explanation\\nWhy the edits made under my usern...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>000103f0d9cfb60f</td>\n",
       "      <td>D'aww! He matches this background colour I'm s...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>000113f07ec002fd</td>\n",
       "      <td>Hey man, I'm really not trying to edit war. It...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0001b41b1c6bb37e</td>\n",
       "      <td>\"\\nMore\\nI can't make any real suggestions on ...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0001d958c54c6e35</td>\n",
       "      <td>You, sir, are my hero. Any chance you remember...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 id                                       comment_text  toxic  \\\n",
       "0  0000997932d777bf  Explanation\\nWhy the edits made under my usern...      0   \n",
       "1  000103f0d9cfb60f  D'aww! He matches this background colour I'm s...      0   \n",
       "2  000113f07ec002fd  Hey man, I'm really not trying to edit war. It...      0   \n",
       "3  0001b41b1c6bb37e  \"\\nMore\\nI can't make any real suggestions on ...      0   \n",
       "4  0001d958c54c6e35  You, sir, are my hero. Any chance you remember...      0   \n",
       "\n",
       "   severe_toxic  obscene  threat  insult  identity_hate  \n",
       "0             0        0       0       0              0  \n",
       "1             0        0       0       0              0  \n",
       "2             0        0       0       0              0  \n",
       "3             0        0       0       0              0  \n",
       "4             0        0       0       0              0  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 159571 entries, 0 to 159570\n",
      "Data columns (total 8 columns):\n",
      "id               159571 non-null object\n",
      "comment_text     159571 non-null object\n",
      "toxic            159571 non-null int64\n",
      "severe_toxic     159571 non-null int64\n",
      "obscene          159571 non-null int64\n",
      "threat           159571 non-null int64\n",
      "insult           159571 non-null int64\n",
      "identity_hate    159571 non-null int64\n",
      "dtypes: int64(6), object(2)\n",
      "memory usage: 9.7+ MB\n"
     ]
    }
   ],
   "source": [
    "train.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "语料按类别的分布\n",
    "- 评论中，恶意评论占比非常较少"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEGCAYAAACtqQjWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAfOUlEQVR4nO3de7RVdb338feHi2KSyGUXJCkYool5a/sogQqoCIaX7KmQND0+Ssc0D1anA9Z5ZDxmXjplWukQ86iY5sgsFZAKQbyQoJsUDI2hFXZ2lm3QJFPwwvf5Y/52LRZr7b3Za6612PB5jbEGc/7mb835+7H2mp8174oIzMzM8tCt3g0wM7Pth0PFzMxy41AxM7PcOFTMzCw3DhUzM8uNQ8XMzHLjULHtnqQ+krq3U6e3pE59HyQNlXREGp4p6Ytp+MDW8q2Y10BJhxeMv1fSIWm4h6QeRfW7SdqpYLynpBFFdc6UdHep/kkaI2m4pFslTZB0vKS+kk6R9NWtabsZOFRsx7ACeEbSbwpef5f0p9Zx4DfAHpKuKywv8XpJ0mVF898b+KmkIcAGYKOkXYDbgWGFFSWNl7RW0lPp9Zqk9xdU+RBwbsH4PsAcSQOAk4FHJS2R9LKk5cAS4LqC+h8EFkgalJbXE5gBHJLmVWwQ8EXgTaBHmld34HDg7fb/a80216P9KmZdW0QMKRxPK/FfA0dExAtF094CZkTELaXmJWkasHvBeC9gEXAh8HJB1V7AjcDtknaOiI2p/C3gxxHxr+n9i4G3JJ0FDAd+QcHKPCIelfRtYHhE3C1pXkRskDQXmAa8SMH3OCJWSvoucDDwJ+DLZKF6GfAjSf87Ip4raOccoAn4Uho/D/grcCTwc0mTyELm6Yj4Xan/E7NCDhXbEV0B3FMcKEkASJoHDG4dBwQ8TbYCJtXpDiwH/kYWBNcCA4HfAZNTtcnA68CxBfN5T8Fusd1S2QayrYUtGxTxX2l5o8lC4qSCyacBRwFnFtT/eqp/CvA54JCI+EvanfWQpG8BN0XEK8DC1Ib9gI8Afwc+TLbF9ChwNvA+sgBzqFi7HCq2Q5F0LjAFOFfSZ4ANEfGj4noR8dEy759WUOcdYEQqPxS4A7gXWAXsCyyJiB8WzeJPZFsXp6fxXwIbKUHSJ4CvkK3U35+2Wq6TNLyg2lTgs6n+x4GLyYLw68BtZFsg5wOXRMQcSc3A94EFwCvAJOAHafhmYBywDlgVEdMlnQ00RMTSUm00K+ZjKrbDkHQa2S/9u1NRM3CTpDNL1H1fwXGPpyQ1FddJ9fpL+grZCvkU4M9kv/wvB06UtFjS/q31I+LZiLgAOBT4RkRcEBEvl5p3RNwVEQeT7VZr3Yo5l2z3FGQ/Cq+PiKfS+D3AYWRbS/cBI8m2pA5ObZ0J7BkRH46IFemg/6XATGANsIzs2NLHybamepMdc2ku1T6zUrylYts9SbuS/XIfD4wBLgKIiEWSTgXukbQpIm4reNtOwGsRMTrNo9yK9RygJzAyIl6X9AKwd0T8EZiSdkFtKGjLU2S7yg4AfpJW3N8B1rbVh4gISTeRbRm9DexPtoWxUdIXgPMj4pG0DCLibWClpEagv6R7gH5kx39anUp2QP4YYADZMZ3lEXG8pOlkW3SHkQWUWYc4VGy7ls7CWka2S+qIiHhV0j+mR8QCSVOAayS1tfLUFgXSQLJjDi8DY9J898sm6chUrRvQW9JnI+KJiDg4nZH1m4j4sKQLyL6H3dOrXD+GAudGxKY0PheYFhHPl6m/M1lQAAwFvhkRPy2q9hOyA/WPkIXHeLLjSAA3AQ+n4VXl2mVWzKFi27WIeEPS+Ih4sY0690pakLY0WovfJAuD1l1Lfyrxvj+THTsBQNIHgAfJDmhfHhE/L7PIT/HPFXarP/LPkwJKuYdst1TJECnhcuC3ZIH6RGugpJMLukfEmxHxdjr1uIlsl+AHgSvSlt164A2yLZdNHVymmY+p2PavRKCIoi2PiHi9YBoR8WJEHFzwakzTS11A2Dsdl1lIdrbVFOCbkr5Z4kLEscDXgK9JOhY4nmw32+KIuAPoDbxTUH8g2TGS84u2SrboQ4FeZNektF6/skvBtPFAYdi9CNwP9CU77Xgk2dlfi4AHgNGSZqjookuzchwqtiPqQ3Ywvdy0kiR9iexMq8cLyr4DvAScCJwYEXNTiI0iO6troaSVknZOAfF9YHJE/JbsIHgT8KM0rzPIzth6oHX+aWvo0Ih4tKg5O6dXcRv3ITuQf1ZkT+D7PXCApCfTyQaz0zJat6xWkwXbRyPie2SnW/8AuCYivgxMIDuF+ZBy/y9mheQnP5p1jKRuxbuCJPUH3o6IV8u8R2Sn5P4ljfeMiLfK1N2J7Jh8yelb0c6yyyi1zIh4s6isRzrQb7bVHCpmZpYb7/4yM7Pc7DAH3wYMGBBDhgypdzPMzLqU5cuXr42Iho7W32FCZciQITQ1lbwo2szMykgX9HaYd3+ZmVluHCpmZpYbh4qZmeXGoWJmZrlxqJiZWW4cKmZmlpuqhoqknpLmFIx/WdJSSfMl7SRpgKRHJD0t6YpUp9NlZmZWX1ULlfQci+XAcWl8b2BERBwBzCd7bsM0YB5wEDAxPSa1kjIzM6ujqoVKRLwREQfyz0eRHgP0lfQwcCTZ3VPHAQvSTfoeAsZWWLYZSVMlNUlqamlpqVZXzcwsqeUV9Q1AS0ScJOkxYDTQH2i9u+t6ssedVlK2mYiYBcwCaGxs7PydM2eWvRt65WaWvLltbW3v/YPq9XEb6d+Q6fOqNu81V3y0avPuMP+NVjDf2vavlgfq15M9uwGyJ+PtQfZc7tb/yT5pvJIyMzOro1puqSwHLkrDw8iCZSEwXtKTwNHANcD7KyizThiy4Y6qzXtN1eZstn2p1vdwTVXmWl7NtlQi4jFgnaQngNUR8ThwLXACsBKYlx6XWkmZmZnVUdW3VCJiWMHweUXT1pIdtM+lzMzM6ssXP5qZWW4cKmZmlhuHipmZ5cahYmZmuXGomJlZbnaYZ9SbWdfla6m6DoeK7RC2lwvLzLZ13v1lZma5caiYmVluHCpmZpYbh4qZmeXGoWJmZrlxqJiZWW4cKmZmlhuHipmZ5cYXP3aAr+Y1M+sYb6mYmVluHCpmZpabqoaKpJ6S5hSVfUHSA2l4gKRHJD0t6YpKy8zMrL6qFiqSdgGWA8cVlO0FnFlQbRowDzgImChpeIVlZmZWR1ULlYh4IyIOBJoLiq8BZhSMjwMWRMQm4CFgbIVlm5E0VVKTpKaWlpbc+2hmZpur2TEVSVOAFcAzBcX9gVfT8HqgX4Vlm4mIWRHRGBGNDQ0N+XXGzMxKquUpxZOAPYHjgX0lXQCsBfqk6X2AFyosMzOzOqrZlkpETImI0cBkYHlEfBdYCIyX1A04GniwwjIzM6ujep9SfC1wArASmBcRz1dYZmZmdVT13V8RMaxofA1wbBpeCxxZNL3TZWZmVl/13lIxM7PtiEPFzMxy41AxM7PcOFTMzCw3DhUzM8uNQ8XMzHLjUDEzs9w4VMzMLDcOFTMzy41DxczMcuNQMTOz3DhUzMwsNw4VMzPLjUPFzMxy41AxM7PcOFTMzCw3DhUzM8uNQ8XMzHJT1VCR1FPSnILxWyUtlXSfpB6SekmaK2mFpNuU6XRZNftiZmbtq1qoSNoFWA4cl8ZHAz0i4ghgN2A8cDrQHBEHAX1T3UrKzMysjqoWKhHxRkQcCDSnopeAa4qWOw5YkIYXAWMrLNuMpKmSmiQ1tbS05NEtMzNrQ82OqUTEcxHxuKSPAZuAXwD9gVdTlfVAvwrLipc5KyIaI6KxoaEh/06ZmdlmetRyYZJOAi4EToyItyWtBfqkyX2AtUDvCsrMzKyOaralImkg8O/ApIj4WypeSHZsBbLdWQ9WWGZmZnVUy1OKzwQGAT+X9Kiks4HbgT0krQReJguKSsrMzKyOqr77KyKGpX+vBK4sUWVS0fjGCsrMzKyOfPGjmZnlxqFiZma5caiYmVluHCpmZpYbh4qZmeXGoWJmZrlxqJiZWW4cKmZmlhuHipmZ5cahYmZmuXGomJlZbhwqZmaWG4eKmZnlxqFiZma5caiYmVluHCpmZpYbh4qZmeXGoWJmZrmpaqhI6ilpThruJWmupBWSblMm17Jq9sXMzNpXtVCRtAuwHDguFZ0ONEfEQUDfVJ53mZmZ1VHVQiUi3oiIA4HmVDQOWJCGFwFjq1C2GUlTJTVJamppacmpZ2ZmVk4tj6n0B15Nw+uBflUo20xEzIqIxohobGhoyLUzZma2pR41XNZaoE8a7pPGe+dcZmZmdVTLLZWFwPg0PA54sAplZmZWR7UMlduBPSStBF4mC4W8y8zMrI6qvvsrIoalfzcCk4om511mZmZ15IsfzcwsNw4VMzPLjUPFzMxy026oSHqvpMYy0w7Lv0lmZtZVdeRA/fuB2ZKWAn8EngTmk53GezEwqnrNMzOzrqTNUJE0CNgE/BC4nixgTknD/wMcW+0GmplZ19HelsoVwIeAV8iuWD8A2An4FHA+cATws2o20MzMuo72QuXfyO6rNQMYAayJiG8ASPotMEfS4ojYUN1mmplZV9BeqJwLvE52B+A3gBsl3QfcBXwOOMuBYmZmrdo7+2t34D3AYGA1IGBXshs49gRWVLV1ZmbWpbQXKvOBpcAgsnA5EdgXOBj4EfClqrbOzMy6lPZCZTTwDrAMWAX8CvgD8FREXAUcKal7dZtoZmZdRZvHVCLiCgBJzwNrgAHAbRFxfapyGdkuMTMzsw7fpuUl4DMR8ZfWQJG0a0Qsi4i3q9c8MzPrSjoaKucCEyS9q6DsynQmmJmZGdCxe38dBUwGpgK3SDoGICIuIHtOvJmZGdDGMRVJPYDvA92BYyPiNUlfSmWtT1n8e/WbaGZmXUVbB+rfRXbB4yHAMElHAh8G9pR0N1mgDK1+E83MrKsoGyoRsR44T9I+ZDeUnAd8FYiCai93dEGSdgXuIDuDbAlwFfBTsgss50XEdEkDOlvW0XaYmVn1tHtMJSKeI7tNy6RsNP4I/B/gQxHxxlYs69PA0ogYRXYfsRvIguogYKKk4cC0CsrMzKzO2gwVSYdIWgS8BdwEnCPp/wK7AHdKulxSR69T+SvQO10suQvwEWBBRGwCHiILrnEVlJVq/1RJTZKaWlpaOthMMzPrrPa2VIYBn0s3jfwYsIDsIP1c4CTgdxERbby/0E+BCcBvgWfJ7n78apq2HuhHdjZZZ8u2EBGzIqIxIhobGho62EwzM+usNkMlIu6KiN+k0dlkt2vZLSKWRMTDEXHjVixrBnB9RAwhC4HhZDemJP27Nr06W2ZmZnVWNlQkdZd0t6Q906OEd0mvzt5E8t1A623yNwKPAeMldQOOBh4k2wrqbJmZmdVZ2VCJiHeAnSPiD8DxZI8Vfg9ZIHTG98jOJnuMLJw+BpwArCQ7g+t54NoKyszMrM7ae0hXY8GtWAScDByaygTsGhHjOrKgiFgDjCoqPrKoztrOlpmZWf21FyrLI+IkAEl7kt1Y8p7WMjMzs0JtHVPpBnSX1FvSz4BvAO8le+KjmZnZFtq6on6TpM+le36dFhGvAEi6SdIEYGVEvFizlpqZ2TavvRtKTpb0TkRcKelSoPXZKXsBV0uamI6VmJmZtXlM5QPArsD/S+PHA//KP5/06GepmJnZZtra/bUa+ApAuhXLdyPiVwVVlle5bWZm1sW0d/YXkN1FkuyKejMzs7I6+jhhMzOzdjlUzMwsNw4VMzPLjUPFzMxy41AxM7PcOFTMzCw3DhUzM8uNQ8XMzHLjUDEzs9w4VMzMLDcOFTMzy01NQ0XSlyUtlTRf0nskPSLpaUlXpOkDOltmZmb1V7NQkbQ3MCIijgDmA98G5gEHARMlDQemVVBmZmZ1VsstlWOAvpIeBo4EhgILImIT8BAwFhhXQdkWJE2V1CSpqaWlpbq9MzOzmoZKA9ASEUcBg4H/Bbyapq0H+gH9KyjbQkTMiojGiGhsaGjItzdmZraFWobKemB1Gv4dsAbok8b7AGvTq7NlZmZWZ7UMleVAYxoeRhYw4yV1A44GHgQWVlBmZmZ1VrNQiYjHgHWSniALlM8AJwArgXkR8TxwbQVlZmZWZx16nHBeIuK8oqIji6av7WyZmZnVny9+NDOz3DhUzMwsNw4VMzPLjUPFzMxy41AxM7PcOFTMzCw3DhUzM8uNQ8XMzHLjUDEzs9w4VMzMLDcOFTMzy41DxczMcuNQMTOz3DhUzMwsNw4VMzPLjUPFzMxy41AxM7Pc1DxUJH1B0gOSBkh6RNLTkq5I0zpdZmZm9VfTUJG0F3BmGp0GzAMOAiZKGl5hmZmZ1Vmtt1SuAWak4XHAgojYBDwEjK2wzMzM6qxmoSJpCrACeCYV9QdeTcPrgX4VlpVa5lRJTZKaWlpa8uuMmZmVVMstlUnAMcCdwIeBAUCfNK0PsDa9Olu2hYiYFRGNEdHY0NCQa2fMzGxLNQuViJgSEaOBycBy4HvAeEndgKOBB4GFFZSZmVmd1fOU4muBE4CVwLyIeL7CMjMzq7MetV5gRKwBjk2jRxZNW9vZMjMzqz9f/GhmZrlxqJiZWW4cKmZmlhuHipmZ5cahYmZmuXGomJlZbhwqZmaWG4eKmZnlxqFiZma5caiYmVluHCpmZpYbh4qZmeXGoWJmZrmp+V2KtyVvvfUWzc3NbNiwoc16N540qGptePbZZyueR69evRg8eDA9e/bMoUVmZp23Q4dKc3Mz7373uxkyZAiSytZ7q/mvVWvDBwfvXtH7I4J169bR3NzM0KFDc2qVmVnn7NC7vzZs2ED//v3bDJRtnST69+/f7taWmVkt7NChAnTpQGm1PfTBzLYPO/Tur2JDps/LdX73XTAq1/mZmW3rarqlIulWSUsl3Sept6S5klZIuk2ZXp0tq2U/8vKfF32Oiy++GICZM2cyc+bM+jbIzKxCNQsVSaOBHhFxBLAbcDbQHBEHAX2B44DTKyjrkm688UYfDzGz7UYtt1ReAq4pWO5MYEEaXwSMBcZVUNYlHXDAAdx+++0AbNy4kdNOO41Ro0YxZcoU3nzzzTq3zsxs69QsVCLiuYh4XNLHgE3Ak8CrafJ6oB/Qv4KyLUiaKqlJUlNLS0vOPcrH+eefzw033ABkWy37778/S5YsYZ999uHmm2+uc+vMzLZOrY+pnARcCJwI/Bnokyb1AdamV2fLthARsyKiMSIaGxoa8u1MTgYOHMh+++3H4sWL+eQnP8nIkSMBGDlyJM8880ydW2dmtnVqeUxlIPDvwKSI+BuwEBifJo8DHqywrMu66KKLeOihh7jrrrtYunQpAEuXLmXEiBF1bpmZ2dap5SnFZwKDgJ+nk7VuA/aQtBJYQRYUOwGndrKsYmuu+GjJ8pVVvKIe4JBDDuHoo49m5MiRrFq1ilGjRrHXXnv948wwM7OuomahEhFXAlcWFd9QNL4RmNTJsi7n0quv48B0m5bFixfXtzFmZjnY4a+oNzOz/DhUzMwsNw4VMzPLjUPFzMxy4xtKFprZp2TxgZ2c3cpzXuh8W8zMuiBvqdTRpTMuYv78+QBce+21XHXVVXVukZlZZRwqdfQv503jmmuu4Z133mH27Nmcd9559W6SmVlFHCp1NHjPvWhoaOCSSy5h0qRJvP7660yYMIHDDz+cyy+/HIDVq1czatQoDjvsMC677LI6t9jMrG0OlTqbPn06V199NRdeeCGXX345kydPZtmyZdx7772sW7eOuXPncuqpp/LEE0+w55571ru5ZmZtcqjU2YgRIxg0aBD9+vVj9erVXH/99YwZM4bXXnuNF198kTPOOINVq1YxadIkXnvttXo318ysTT77axuy7777cvLJJzN27FhuueUW+vbty6JFi5g+fTrDhg1j77335pxzzqFnz571bqqZWUkOlUIzXy1ZXO0bSraaPn06Z599NjNmzGDYsGGcfvrpDBs2jDPOOIO3336bCRMmOFDMbJvmUNkGPP/880D2bJX7779/s2mNjY0sW7asHs0yM9tqPqZiZma52eFDJSLq3YSKbQ99MLPtww4dKr169WLdunVdeqUcEaxbt45evXrVuylmZjv2MZXBgwfT3NxMS0tLm/VeeuWNqrXh2b/tUvE8evXqxeDBg3NojZlZZXboUOnZsydDhw5tt97E6fOq1oZyjzA2M+uKuuzuL0m9JM2VtELSbUoPvjczs/rpsqECnA40R8RBQF/guDq3x8xsh9eVQ2UcsCANLwLG1rEtZmYGqKue+STp58A3IuIBSecAh0XEZ4vqTAWmptF9gdU1aNoAYG0NllNP23sf3b+ub3vvYy37t1dENHS0clc+UL8WaH1UYx9K/AdHxCxgVi0bJakpIhprucxa29776P51fdt7H7fl/nXl3V8LgfFpeBzwYB3bYmZmdO1QuR3YQ9JK4GWykDEzszrqsru/ImIjMKne7Sihprvb6mR776P71/Vt733cZvvXZQ/Um5nZtqcr7/4yM7NtjEPFzMxy41DpAEkHSzq4k+/9Tt7tsY6TNFPSmHq3o1oK/zYlnSXprJznf4qk3fOcZyfakNt3SNItkoYUjO8u6ZS85m8OlY46OL22WkR8Pue2bJMqWZlJmihpYo7N2ZF0+m+zg04B6hoqVf4O7U7Wx04pFXid+SEjaUxh2JWbf7l6HZh/h9vU2WW0cqi0Q9KVwMXAxZIWS9pZ0g8lLZF0h6SdJB0qaV6q/5CkfQrev7hguJekOyU9Juk+SZXf937bcVZn3xgR8yNifh6NKP58gJ2AaZIeTZ9Xt/Q5zJH0S0l3S+pR6rOR9C5JP07z+l6a/0xJX0tlKyQNLFWvFor/NlPxh9Lf4LOSRqR6iyVdKGlFGi/Vr0GSHkn9vyyV/QKYCNwl6epa9atYa9/Sltg3C/unzK3ps3xA0m5ppTgzvWeIpFvKzHcycBcwMf19fGhr25Zj4I0BhnRg/iXr5ayiZThU2hER/wF8Hfh6RIwBzgWeiYhRwHPAv0TEr4Dfpy/owxHxXJnZTQVWRMRI4D7gwEraVmLluFuJlcV3JI1Kw9+UdKSk90r6maRlkmakaWMkfSu9/2upbGSa13JJJW/YKWlfSY8Ch6Qv5mdS+fGSHk+vY1PZT9I8T5D03wXz2Gy3jbItl2WSnpR06lb+t2zx+QBPRcRo4O/AicAIICLiI8CtQG9KfzZTgV+neQ2S1Pp57QuMBn5CduFtuXpVVeJvE2Ak2c1VrwBOTmWDgO7p5quUae/7ga+QXVB8Upr/eGA+8ImIuKj6PeqQ4v71I9tSGwV8ja3YqoqIO4FPAPMjYnREPL21jSkIvN0lLZD0EHBUKiv1PbtF0n+m8P5l+g7PBs4GviPpzlLzT8Nb1JP0vYLv13dbh8s4tujH0K6S5qe23NzGMrboR1scKltvf+CxNPxYGgf4LnAecF0b790PeDwN/zfQVGFbileOZ7PlyuLHZL82AQ4BlgAzgDsj4nDgZEn90/RPATMi4qtp/Dqyu0GPJ/vCbiEiVqcV9pPpizlbUjfg6vS+CcC1qfo0YCbwZeA/Ss0vvfdbZCuOo8lWFluj+PP5UcF4EzAM+BXwtKQ5wDFkYVPqs9kX+Fj6Yu8N7JGm3xrZufgvkG0JlatXD3dExJsFbQN4FbimoE6p9m4EvgjcQBay26rN+hcR64BbyH4InAGsL6pfq70BU4H7I+JoYFMqK/c92z39eFkNHBoRnyH7m/t8REwut4Ay9WYDn5bUHTiC7Oa65RT/GNoDuJ7sZrx7S3pvmWWU60dJDpWOeQPYNQ2vIvvwSP+uSsMzgUuB/2xjPr8BDk/DXyH7FV2J4pVjqZXFI2RbEUPIfsFvSvXOS/V6A+9L8/th0VbWUOBm4G627ss5AHg5Iv4aES8D6yX1jYg/AK8Az0ZEucdtDgDWRsT6iFhP9ge9NYo/n08Ah6Xxg4E16d+lEXFiWt5RlP5sVgPfTlsBlwD/k6a/VrTMcvVqofBvU2zZNoDX0+feqlR7vwRcRbZyLLx4rXD+24LN+idpT+Cv6bP8M/Bx4E2g9QaI7R2r+0f/pIqeybQ3sCINP5H+Lfc9uzn9Wxj8nRIRy8h+SE0EHij6nIsV/xjaQBbEt5Ft4ZX7jpfrR0kOlY5ZAHxc0mPAr4ERkpYAw4FbJJ0AvBERl5DdOmZkmfncCBzcursI+EGF7SpeOT5P0coi/ZGtIVtJ3p3etxqYnur9F9mKHrZcIf2abHfRsWS3xWlLd/jHF7MF6Cepj6S+wLsj4pW05bQrMFxSuUdurgUGpF15uwJPtbPcYt+n6PMBPpLGG4B7gd8Dn5f0ONkXpInSn82NwAmSfkm2sv1DmWV2tF41FP5tdvSZ0qXaOyeV/xT4u6TWFcds4CZJT2jbPAb4J7K+LCPbun2A7MfWvmkX8KC23hwRL5H191Hg3ypoxx+AA9Lwoenfjn7PoOPhVqrefWR7Sma308bi5Z4L3ANMIdtaL7eMcv0oLSL86qIvsl8XvyDbbfMg2d2a7wJ+CcwDdkv1xpH9OumexgcC9wNLyVaePcgOzs0smv8osi2dJ4EvtNOW88l2M81O48endj1OFkrdgIfJVvSHAfMK3nsWcFbB+EcL3ntSvf+f/fKr3AtYnP4dACwGHk2vMWW+Z7cAQ9J7ZgJj0vCw9L5lwAeK518wvkU9sl23Te20s3BZZ6XXUcAz6Xu5BPhIqWWU6kdby/JtWszMuqh0Es51wKUR8eN6twd87y/bCpLOJjsZoND1EdHerjEzqwFJA8lOzin0QkR8umZtcKiYmVlefKDezMxy41AxM7PcOFTMzCw3DhUzM8vN/wf1Oi7gdtFZkQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "cats = train.columns[2:]\n",
    "\n",
    "w = 0.4\n",
    "x = np.arange(len(cats))\n",
    "y1 = [len(train[train[cat]==0]) for cat in cats]\n",
    "lab1 = 'No'\n",
    "\n",
    "y2 = [len(train[train[cat]==1]) for cat in cats]\n",
    "lab2 = 'Yes'\n",
    "\n",
    "p1 = plt.bar(x, y1, w)\n",
    "p2 = plt.bar(x, y2, w, bottom=y1)\n",
    "\n",
    "plt.ylabel('评论数')\n",
    "plt.title('不同类别评论数')\n",
    "plt.xticks(x, cats)\n",
    "plt.legend((p1[0], p2[0]),(lab1,lab2))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1017      WOULDN'T BE THE FIRST TIME BITCH. FUCK YOU I'L...\n",
       "1312      SHUT UP, YOU FAT POOP, OR I WILL KICK YOUR ASS!!!\n",
       "7299      You're a stupid cunt \\n\\nFuck you dumb arse, y...\n",
       "13648     Bitch \\n\\nYou are a little bitch. I fuckin spe...\n",
       "13964     I am going to murder ZimZalaBim ST47 for being...\n",
       "22158     FUCK YOU!!!!!!!!!!!! YOU FUCKING NIGGER BAG OF...\n",
       "29968     u motherfukkin bitch i want to rape you smelly...\n",
       "32098     Fuck All Asyriac Nation \\n\\nQamishli belong to...\n",
       "33951     GO FUCK YOURSELF BITCH.  I HATE YOUR SOULD.  M...\n",
       "38513     AM GOING TO RAPE YOU IN THE ASS YOU FAT BITCH ...\n",
       "38578     fuck you honkey, why you hatin' on blacks? You...\n",
       "46155           So fuck off and die, rape fantasising sluts\n",
       "55160     ANYONE WHO SUPPORTS THIS IS FUCKING SICK. MEN ...\n",
       "59279     JEW \\n\\nGet the fuck out of here you jewish so...\n",
       "65075     FAGGOTS!  YO FUCKER IT WAS FUCKING HAYES YOU F...\n",
       "67247     YOURE A FRREAKING JEW, AND PLEAZE COMMIT SUICIDE.\n",
       "73821     Shut up you asswipe, we don't care. I'll decap...\n",
       "77139     Eat shit you fucking arse rapping jew fucking ...\n",
       "80612     I hit a jack off all day, after the rape my mo...\n",
       "82141     Help! \\n\\nIf you become an admin, I will fucki...\n",
       "99819     Glenn Beck is a faggot. He likes dick in his a...\n",
       "100222    Urgent!!!! \\n\\nMind yo own fucken buisiness st...\n",
       "106617    , I hope your retarded kids get anal raped and...\n",
       "116767    Go fuck yourself!!! you fucking delteted it an...\n",
       "118928    You god damn son of a bitch you fucking blocke...\n",
       "123794    She is a Motherfucker \\n\\nI will block you Mot...\n",
       "133551    and your little faggot boy Propol, pray, pray ...\n",
       "142513    faggot \\n\\nYou lil piece of shit I haven’t van...\n",
       "151131    LGBT \\n\\nyou little fuck , are you a fag , tha...\n",
       "157428    Ok.... \\n\\nBitch i swear to God i will fuckin ...\n",
       "159400    Shalom \\n\\nSemite, get the fuck out of here. I...\n",
       "Name: comment_text, dtype: object"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from functools import reduce\n",
    "mask = reduce(lambda x, y: x & y, [(train[cat]==1) for cat in cats])\n",
    "train[mask]['comment_text']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "语料单词数分布\n",
    "- 不同评论间单词个数差别很大，最少小于5个单词，最多超过200个单词"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "train['length']=train['comment_text'].apply(lambda x:len(x.split()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "median = train['length'].median()\n",
    "mean = train['length'].mean()\n",
    "mode = train['length'].mode()[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "36.0 67.27352714465661 5\n"
     ]
    }
   ],
   "source": [
    "print(median, mean, mode)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlwAAAHxCAYAAAC4dmIdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOzde3xV1Z3//9c6JwkBhBAS7leVW7iImqigVBGr9gJ1CoTit7bAfEeczvSrnbY6034LHujQjq391l6c/qTTUdqxg0RsFUar9YJWAZVAuSOgoALRkCt3SHLW7499Eg/hJDlJzsk+e+f9fDzOg2Sftff+nKOVd9daey1jrUVEREREkifgdgEiIiIifqfAJSIiIpJkClwiIiIiSabAJSIiIpJkClwiIiIiSabAJSLSCsaYfzHGXOl2HSLiLQpcIj5mHLOMMZdFHetmjEmP0TZgjOka670YbXsYYxYaY/rG0XacMea3xpiJjY7nGmO+bozJieMa3zTGrDLGBKOOXWSMecsY87mWzk8UY8zngR8C34yjbQ9jzD8bY4LGmAeNMUXGmDRjzHeNMf0jbdYZY/53G2vJMMb0aOU5+caYHxtjLm3LPUWk7RS4RPzNAH8PPGGM6RY5tgs4Z4yx0S+gDjgFfLnhZGMWGGO+GuO6twOPAPPiqGEo8BXgeKPjnwJ+DgyL4xqbgduAn0UdmwGMBN5s6iRjzNhIUAtEfu8SHdpaI/L9PQRsBwqNMde0cEr9d/k14Ezk9WXgLiBsjOkOXA/sjfP+n4l8lgGRQxuA/9vKj/F5nH8fjrXyPBFpJwUuER+z1oZx/pLvxye9MnlAOs7//m8E3o/8nAZkAo9HXWIQ8O/GmBH1B4wxXYHvAUeAmcaYLi2UMRA4aK19r9HxzwMvW2s3x/E51gGzOT9w/R/gIWtteTOn7gWGA/8Q+f0RoLZx2Ix6/Vsz1/oPoDdwC05QXG2MGdtM+3ScYPp7nDAbBrYCdwDVwA1ADZBtjJkeec2MBLHzRALjMpzvsjRyeDnwv40xPWPd3BgzwhgzxhhziTFmuDFmeOTeTwHd649FvS6JtL/g/iKSANZavfTSy+cvnJ6gQOTnzwKXR36eihOGwOkNexAYE3VeANgC/DDq2BLgI6AP8CzwRyAzxj3HAzbG6x+Arjihowo4GPX6fqNrjGniGrFetzfx2ccB04Eg0K3+e2j0GQ3wGLAkxvkB4Jc44ehzkWNB4A9ABfCZJu77x2ZqfQz4BfBupN0LkePPAf1iXOse4ARwadSxrsBh4NdN3H8zcBanZ7EKp4etLvJzrNfxSPsb3P73VS+9/PhKQ0R8KdIrkmGtPWOt3Rf11mycnqvPNDrleuBuonqRrLVhY8xnrbUfRa45Had3a6a19qgx5nZgI/CKMeYOa+27Udc7E/nz4qhjK3H+Ui8ETuP0ANXvL/a9qJ/rVdVfw1p7MFLDQ0Ava+38qM96MHLd6M9/BzT8N+4vwBTgT8BZY0x00wycEErj+xtjBgG/AT4N/K219tlIL9juyGf4HfCcMeZpnLBYHHX67TjB7HHgOmAt8BLwAVAMvAf8H2vtE8aYPGCjtfazNGKMyQceAO6O/n6ttaeNMd8EVhpjXrPW/i76PGvtlVHXSMcJYM9Za++LzKc7YK3V0KJIB9GQooh/XQacjhouqx8q+gFwS/RE+oivAUXW2g8jE+gzAKLC1hSc4bFfWWufjrxXPzQGsCsyOT438ntdpM3B+hdOCAvgzD36qbX2FWvtOusMGZ4FyhrVVNOKzxtu9Ptc4E6cAHmNtfZVa21Xa22vRq9u1tpXo0+MPDxwP06wmgzMsNb+NvL2TcBYa22ttfZ24B9xhhk3GWPejhpizQVWA1k4wSwdZ2j3V8Df4PQQ1ofEXsDRxh/IGDMKJ6itstYub/y+tfYJ4NfAY8aYv2vmu/lnIBv4fuT3jcDlzbQXkQRTD5eIf23DmZM1EKc35SyAtfZdY8xvgb5ALTT0gFyCMy8KnLC2JaonaBLwZ+BV4DVjTLG1Nj9yvVJjzPXAd4ErrbX1oclGrn1RVE3ByPHlOBP5+9cHOpzQUcX56nucDjTqlcIY0+yEfWvt9Ei7dbQuuIETDAcD64G7rLXvR713mk9677DW/rsxZg3O599ora0PUXcDH+KEnc9HzvkjMAJnPt27OOHtj5F7HW70+W7C6RF8H1hijMnmwh5AgO/ghKlfG2NuA75krT0VdZ3PAiFgDhA0xvSKfB8m8nO9U9bac3F9OyLSagpcIj5lnQnzZyNPIGKtrTXGpOH0tCyw1lpjzNTIezXA1ZFlC7rzSVjLw5nDtQmYhjPp+7M4vTdEesH+H/C8tXaJaZyKHI2fTvyNtfYnxpgnI+8tiBzP5sLAVb9ERTxDis312NdE2o3B6d2J9mlr7aboA5Hv5mvW2toY1wrTKPhYaz/E6SGsrycI3I8Tsm4HfgSMiASav4/0go0F1hpj7sGZ7/Zu1PkBYD7OXLk50e81YSLOHLj9jcLWVGAVTtBd3eicdY1+/zrwcAv3EZE20pCiSOcyHWe5gnAkiL0CDItaGqIGZ3J2z6ieGqy1ddbaDZG/zOsAa4yZhNMDNAunRwxrbawemCFRr+iw8xPgy8aYgZHfewGVjc7Nivx5IKrGe4B5jZa0GAa09LQkwDmgqn44ESekxApVNBG24vUp4CTOd/VfOEOJxyP1rrLWnrXWbsEJmDfj9CC+HXXvsLX2K8A1wAGcCfL1Q7UTrbXGWmv4ZDj3PWvtvdbaR+qvEZlf9yzOkGM1cEXUeY1/f5VGc+BEJLEUuEQ6iUivyUtAdyAY+Yv2RuD9qL94g5H3q1u43GCcyeD/AQyz1i5rqqG19lD9i6i/1K21G3Amco+LHLqgh8tau7e+tqgafwasaHzcWvvfcXwNjed5JctfcJ6IHIPzmfpFan8NZ05WvSeARTjB6cXGF7HWnrKOMzi9YLXA/qgmg4BKa+2J6POMsyDqd4BHgW/FWXN7AqaItEBDiiKdx0NA2Fr7jaYaRIYhTzU+bozph9NTMz1y6CNglLW2rp01TY4M33XBGcJsCFyRnq8zXBiSugAZjeYfgfN/ILsCJZHP0fgz3Mr5YaVemxZCbU7kezltjPkWzmf4J2PMhzg9gU9GNV2O83TmS42eJI2lEKjvZaw3COepx8b3P26MucZaexog9kiviHQkBS4R/6sPJjdy4VIQ8foDUGytrV9SobZx2DLG/C3whLX2ZP2hyPHGw4wr6n+IGoLMjvwZ3cP1V5wA1TjUdY1cu/GWPgGcMNav0XW6RNrOBv4XMNQYU/9+D5xlIVojjfhD2l04a3j9b5z5UedwFkz9prV2N878tTTgEmNMjm1iEVdjzA3AQpz5YNEG4UzMv0B92IqquTk9UQ+XSFIpcIn432dw5gF92lr7caP3gkSCURMGR/48jDN3CpwJ47nGmNz6JxIj63P9BmcOWP16UPUT3vOirvfbqOMYZxPoSZHXR9bahgn21tqY+zQaY34OZFlrW9xWKLKswmU4IWwKzuf9wFo7PPL+vUD9d9KT2E8BNtY98opHNk6v4P/CeQL0LZzwlR5Zu2tSpK4fAxsja5k1bFUUGQa+C6d38j+stauj3gsCV+MMy7akcajMiNQwPXLtYTgPRIhIkmgOl4jPWWv/DWcdqsZhC5zeosxmTr8M54nFr0YN0+3EmVh/NGrS+h9xtoyJnkd1Cmeu1Z76F/DvOD1X9dJw1uTqg7NuVjwyiG+CPDjh7k3gOmvtLpwhxYaeMWvtj4HRxph9OGtj7Yrjmt2Bi5prYJyNtZ8BDgEFwPXW2l9aa9/Cmd+1GSfgXW2tXY+zT2QZziKmfSPXuBRna6JfAP9qrf2HqOv/Cuf7ncz5c8Ka8jDnr/PVJfLagPPPZLy1dnsc1xGRNjKxHyoSEXEYY7o2Gp7yFeNsSv0V4A1r7Y4EXnc6sKN+OYuo4wHgs9ba/2l0PAPob639IOrYP0bq+mujtlfgPN34vLVWPVMiHqDAJSIiIpJkzQ4pGmMyjTFrjTFbjTG/i7WoYVNtjDErjDEbjTHPRBZTvMoYc8gY83rkNTpZH0pEREQklbQ0h+sO4JC1diLO5M+b42kT2XMtzVo7CWeewi2R935lrZ0Seb2TsE8hIiIiksJaClzTcPZPA3gZ57HyeNp8jLM4YfQ9soFZxpi3jDGrm9gCRERERMR3WloWIodPVpw+BsQaBrygTf0CfsaYL+IsWvgCzl5fi6y1/2OMWY+zsvK6xhczxizEWW+G7t27548ZMya+T3Iqsl9ut9zm2wkAdZXODirB7OwWWkrlGee7ys7UdyUiIk0rLi4us9b2ifVeS4GrjE/2MsuK/B5XG2PMF4C7gRmRTXMPAvVPAB0EYq6xY61djrP6MgUFBXbTpk2xmomIiIikFGPM+02919KQ4ks486/AGTp8JZ42xpj+wL3A9KiFDL8JzI08Ej2eT8KXiIiIiK+1FLgeBwYZY7YBFcC7xpgHW2jzEjAPGAA8H3ki8W9xtrdYgLMI4R8iixAmzqZHnZfEpfKJVVQ+scrtMjyhaG8RRXuL3C5DREQ8LKXX4WrVkGIoMqoZqm6+nQCwe4yz20rent0uV5L6JqyYAMD2eVqIW0REmmaMKbbWFsR6T3spioiIpJiamhoOHTrEmTNn3C5FYsjMzGTw4MGkp6e33DhCgUtERCTFHDp0iB49ejB8+HC0ilJqsdZSXl7OoUOHuPjii+M+T5tXi4iIpJgzZ86Qk5OjsJWCjDHk5OS0uvdRgUtERCQFKWylrrb8s1HgEhEREUkyBS4RERFpt1AoxLp16xJ+3R//+MeMGzeOKVOmcMsttzTZbt++feTn5zN+/Hh+8YtfNHnMLf6ZNK/lIFpFy0HET8tBiIgkVl1dHTfddNN5x77+9a8ze/bsC9pWVlayfPlyrrvuumavuWTJEu69916+8IUvkJeXx7x582Ie69mzZ0I/S7z8E7hERET8qn6tyVimPwQFC5yfNz0Ka7/RzHXi65yYOnUqAwYM4OjRo9TV1TFp0iRCoRDz58/ngw8+YNiwYTz22GOcOnWKwsJCzp07RzAYZOrUqXz88cfMmzePyspK/uZv/obvfOc7F1w/GAzG3RtWWVnJ9773PcrKyvjqV7/KvffeG7Pdyy+/zM9+9jO6devGxIkT2bBhQ8xjt956a1z3TTQNKYqIiMgFfvjDH/LBBx+wZs0aNm7cyK9//WvGjh3LG2+8wciRI3n00UdZvnw5n/vc53j11VcJBAIN582dO5c333yTp59+mvLy8nbVcdNNN/Hwww/zxhtvsGzZMs6ePRuzXXl5OVlZTjDt2bMnFRUVMY+5xT89XI9c7/x512vu1uERB2bOAuDip1a7XEnqm7NmDgCrZmgrJBFxSbzTZgoWfNLb1U7Dhw9n4MCBXHTRRVhr2bVrFzNnzgRg8uTJPPfcc5w9e5Y5c5z/Rl511VUAvPPOO2zYsIHHHnuMEydOcOTIEXJycs67dl1dHTfccMN5x+655x4KCwsvqGPy5MkMGjQIgG7dulFdXU3fvn0vaJebm0t1dTU5OTlUV1eTm5sb85hb/BO4Sra6XYGnnNmV2K0s/Wx3hea7iYiMGzeOjRs38ulPf5qNGzcybtw4ysrK2LFjB9OmTWPz5s3ceuutjB49mttuu40bb7yRxx57jOzs7AuuFQwGef311+O675w5c/j9739PRkYGgUCAPn36xGx300038cILL3DbbbexdetWJk2aFPOYWzSkKCIiIi36u7/7O3bu3Ml1113H3r17mT9/PgsXLuSpp55iypQpnDx5EoB/+Zd/4cc//jGTJk3ixRdfpH///u267w9+8ANmz57N9OnT+dWvfoUxhqeeeorHH3/8vHaLFy/mwQcf5KqrruK+++6jR48eMY+5RZtXd1LavDp+2rxaRDra7t27ycvLc7sMaUasf0bNbV6tHi4RERGRJFPgEhEREUkyBS4RERGRJPPPU4pXznO7Ak/pFePRW4lt1shZbpcgIiIe55/A9YWfu12Bpwz4/lK3S/CM0LUht0sQERGP05CiiIiIpLRvfetb5OfnM3/+fAB++tOfMmXKFKZMmULPnj15//33Y55XWlrKtGnTuOKKK1i6dGmrzk00/wSuI1ucl8Tl9I6dnN6x0+0yPGFn+U52luu7EhFJpMLCQqZOndrw+uUvfxmz3SuvvEL37t0pLi5m+PDhVFVV8U//9E+8/vrrrFu3jn79+jFkyJCY5y5fvpzbb7+dDRs28MADD3DixIm4z000/wwpLp/q/Kl1uOJyMLIju9bhatnctXMBrcMlIu6pXw8wlsWTF1M4ypmXW7S3iKUbmp4yEu9/x2JtXr1o0SK++tWvUlJSwuWXX87DDz9MSUkJc+bMoba2lmnTprFs2TLmz5/PpZdeyrPPPosxhpdffpnMzMwL7lFUVBRXLS+++CL79+/nmmuuYfLkyfTq1avhvddee43Jkyc37OPY2JQpUxg5ciT1a45Grz3a0rmJ5p8eLhEREUmYxptXL1++nPHjx/PGG29QUlLCtm3b+PDDD1m2bBkvvPACzzzzTMO5VVVVbNiwgdGjR7N58+Z21XH06FHGjx/P+vXreeqpp/jggw8a3luzZg3Tp09v8typU6cyaNAgvvOd73DPPfect9J8S+cmmn96uERERHwq3p6pwlGFDb1d7dV48+p33nmH9evXs27dOqqqqjh8+DADBw5k2bJldO/enRMnTjScu2CBs4H2sGHDOHfuXMzrz5w5k9LS0obf58yZw913331Bu549ezJ69GiCwSBDhgzhyJEjDB06FIDnn3+eUCjU7Of453/+Z+rq6vjBD35w3vF4zk0kBS4RERFp0ejRo7n66qtZsGABTz/9NEOGDOGBBx7gvvvuY+LEiVx22WUNbS+66KIWr/fUU0/Fdd/8/Hw2bdrErFmz+OCDDxg2bBjgbK3Tv39/srKymjx37dq1VFZWsnz58vOOx3NuomlIUURERFp055138uyzz3LttdeyfPlyhg4dyowZM7jzzjv54he/SPfu3Tly5EjC7ztr1iwOHjzIVVddxVe+8hUGDBgAXDgkGGtD64cffpji4uKGpxK3bNkS89yOoM2rOyltXh0/bV4tIh1Nm1enPm1eLSIiIpJi/DOHa+E6tyvwlOFPPul2CZ6xcvpKt0sQERGP80/gGniF2xV4Stfx49wuwTPG5ei7EhGR9vFP4AKin+7swCc9RURERJrlnzlcz9zNDC5cv0NiK1m0mJJFi90uwxNC60OE1ofcLkNERDzMP4Fr8wryWeF2FZ5RVVREVZzbKnR2q/etZvW+1W6XISKS0ubPn8/BgwcB+Oijjy5YaDTZDh48yA033EBBQQGPPPIIADU1NSxYsIDLLruM733ve02eu3HjRi677DLGjx/P6tWrmzzWHv4JXCIiIpIS+vfvz3e/+912X6euru68Da6nTp3Kk0089PXggw+ybNkyNmzYwLJlywD4r//6Lz71qU9RXFzMuXPnaGoprG9/+9ssX76cF198kW9/+9tYa2Meaw9fzeESERHxo/q1E2Ppv2QJ2V+aA0DlE6v46P77m2wb79qL8W5e/f7773P77bfTtWtXTp482XD+wYMHCYVCPPbYYwDs3LmT+fPnEwgEmD9/Pl/72teYOnUqn//85ykqKqJ///7n7cVYLxgMsm7durhqXrJkCb169aK8vJzu3bsDzsbXGRkZ/OIXv+COO+7AGHPBeWfPnmX79u1MmjQJgB49erB3796Yx0aPHh1XLbGoh0tEREQuEM/m1Q888AD33nsvf/rTn6iqqmryWocPH+Y3v/kNa9eu5T//8z8bjmdmZvLWW29x/Pjxdq9Sn5OTw0cffUReXh4LFy4EnI2vp02bxquvvsq//uu/xtzXsaKi4rxNrXv27NnksfZQD5eIiEiKi7dnKvtLcxp6u9orns2r33vvPSZOnEh6ejpXXNH08kzBYJDvfve75ObmUltb23C8pU2u6+rquOGGG847ds8991BYeOEG3YcPH2bQoEHs27evIXTVb3zds2dPunXrRkVFBf379z/vvN69e3P8+PGG36urq2Mey83NbeEba54Cl4iIiLQo1ubVQ4cOZceOHQwdOpRt27Y1eW4oFGLlypUEg0FuueWWhuMtbXIdDAZ5/fXX46qvsLCQVatW0adPH2prawmHww0bX48ePZqzZ8/GDE1dunRh4sSJbNiwgeHDh3PixAlGjRp1wbERI0bEVUdT/BO4BkzkSInbRXhH5tixbpfgGXm9tZ+ZiMidd97J/Pnz+fWvf012djb//d//zX333ceXv/xlfvKTn5CZmdnkuTNnzuTWW2/lkksuoba2ljNnziS8vmXLljFr1ixqampYtGgRPXr04O///u+5/fbbeeSRR1i6dClpaWk8/PDDjBkzhptuuqnh3B/96EfceeedhMNhfvKTn2CMiXmsPfyzeTVa+FRERPxBm1enPm1eLSIiIpJiFLhERERSUCqPQHV2bfln45/AFcoiRJbbVXjG7jF5za7rIp+YsGICE1ZMcLsMEelEMjMzKS8vV+hKQdZaysvLm52zFot/Js2LiIj4xODBgzl06BBHjx51uxSJITMzk8GDB7fqHAUuERGRFJOens7FF1/sdhmSQP4ZUhQRERFJUQpcIiIiIkmmwCUiIiKSZApcIiIiIknmn0nz0x9izVq3i/CO/kuWuF2CZyyevNjtEkRExOP8E7gKFlCswBW3RO0m3xkUjrpwV3oREZHW0JCiiIiISJL5J3BtepR8HnW7Cs+ofGIVlU+scrsMTyjaW0TR3iK3yxAREQ8zqbxtQEFBgd20aVN8jUPOtj4hqs8/HEpwUT5Rv61P3p7dLleS+uq39dk+b7vLlYiISCozxhRbawtiveefHi4RERGRFKXAJSIiIpJkClwiIiIiSabAJSIiIpJkClwiIiIiSabAJSIiIpJk/llpPlStJSBaQctBxE/LQYiISHuph0tEREQkyRS4RERERJLMP4HrketZyPVuV+EZB2bO4sDMWW6X4Qlz1sxhzhpt9i0iIm3nnzlcJVsZ6HYNHnJm1y63S/CM3RWa7yYiIu3jnx4uERERkRSlwCUiIiKSZApcIiIiIkmmwCUiIiKSZApcIiIiIknmn6cUr5xH8Wa3i/COXoWFbpfgGbNGavkMERFpH/8Eri/8nDUKXHEb8P2lbpfgGaFrQ26XICIiHqchRREREZEk80/gOrKFAWxxuwrPOL1jJ6d37HS7DE/YWb6TneX6rkREpO38M6S4fCp3ASGq3a7EEw7Ong1A3h6tot6SuWvnArB93naXKxEREa/yTw+XiIiISIpS4BIRERFJMgUuERERkSRT4BIRERFJMgUuERERkSRrNnAZYzKNMWuNMVuNMb8zxph42xhjVhhjNhpjnjHGpMVzLRERERE/amlZiDuAQ9ba6caYtcDNwAsttTHGnALSrLWTjDHrgFuAgXFcq+0WruOR5Qm7mu8Nf/JJt0vwjJXTV7pdgoiIeFxLQ4rTgD9Hfn4ZuDHONh8DP2t0j3iu1XYDr6CEKxJ6ST/rOn4cXcePc7sMTxiXM45xOfquRESk7Vrq4cqBhpVEjwGj42ljrd0HYIz5IhDG6cm6J45rYYxZCCwEGDp0aFwfojmhUOyfRURERDpKSz1cZUBW5OesyO9xtTHGfAG4G5hhra2N81pYa5dbawustQV9+vSJ93PAM3czg7vjb9/JlSxaTMmixW6X4Qmh9SFC60NulyEiIh7WUuB6CWf+FThDgq/E08YY0x+4F5hurT3eimu13eYV5LMioZf0s6qiIqqKitwuwxNW71vN6n2r3S5DREQ8rKXA9TgwyBizDagA3jXGPNhCm5eAecAA4HljzOvGmL9top2IiIiI7zU7h8taexaY3ujwt+No80Dk1VjjdiIiIiK+p4VPRURERJJMgUtEREQkyRS4RERERJKspXW4vGPARI6UuF2Ed2SOHet2CZ6R1zvP7RJERMTj/BO47nqN5SG3i/COi5/SMgfxWjVjldsliIiIx2lIUURERCTJFLhEREREksw/gSuURahh5yBpye4xeeweo7lJ8ZiwYgITVkxwuwwREfEw/wQuERERkRSlwCUiIiKSZApcIiIiIkmmwCUiIiKSZApcIiIiIkmmwCUiIiKSZP5ZaX76Q6xZ63YR3tF/yRK3S/CMxZMXu12CiIh4nLHWul1DkwoKCuymTZtabBcKte66rW0vIiIi0hJjTLG1tiDWe51ySDEUUugSERGRjuObwJXPo+TzqNtleEblE6uofEKbMsejaG8RRXuL3C5DREQ8zDdzuGbwDQCKWeByJd7w0f33A5D9pTkuV5L6lm5YCkDhqEKXKxEREa/yTQ+XiIiISKpS4BIRERFJMgUuERERkSRT4BIRERFJMgUuERERkSRT4BIRERFJMt8sCxGi2u0SPCVvz263S/CM7fO2u12CiIh4nHq4RERERJJMgUtEREQkyXwTuBZyPQu5vs3nd7b9FQ/MnMWBmbPcLsMT5qyZw5w1WpFfRETazjdzuAay1e0SPOXMrl1ul+AZuys0301ERNrHN4GrLTpTj5aIiIi4xzdDiiIiIiKpSoFLREREJMkUuERERESSTIFLREREJMl8M2m+mHlul+ApvQoL3S7BM2aN1PIZIiLSPr4JXGv4udsleMqA7y91uwTPCF0bcrsEERHxOA0pioiIiCSZbwLXALYwgC1ul+EZp3fs5PSOnW6X4Qk7y3eys1zflYiItJ1vhhTvYioAIardLcQjDs6eDUDeHq2i3pK5a+cCsH3edpcrERERr/JND5eIiIhIqlLgEhEREUkyBa5mhELab1FERETazzdzuJIpOnQpgImIiEhrKXA1okAlIiIiiaYhRREREZEk800P1yOsc7sETxn+5JNul+AZK6evdLsEERHxON8ErhKucLsET+k6fpzbJXjGuBx9VyIi0j4aUhQRERFJMt8ErhnczQzudrsMzyhZtJiSRYvdLsMTQutDhNaH3C5DREQ8zDeBK58V5LPC7TI8o6qoiKqiIrfL8ITV+1azet9qt8sQEREP803gEhEREUlVCo016xUAACAASURBVFwiIiIiSabAJSIiIpJkClwiIiIiSabAJSIiIpJkvln49AgT3S7BUzLHjnW7BM/I653ndgkiIuJxxlrrdg1NKigosJs2bWqxnRsbTmuTaxEREYlmjCm21hbEek9Dim0UCil0iYiISHwUuERERESSzDeBK0QWIbLcLsMzdo/JY/cYzU2Kx4QVE5iwYoLbZYiIiIf5JnCJiIiIpCoFLhEREZEkU+ASERERSTIFLhEREZEkU+ASERERSTIFLhEREZEk883WPmt4yJX7Ri9+6qWFUPsvWeJ2CZ6xePJit0sQERGP803gKmaB2yV4SvaX5rhdgmcUjip0uwQREfE4DSmKiIiIJJlvAlc+j5LPo26X4RmVT6yi8olVbpfhCUV7iyjaW+R2GSIi4mG+GVKcwTcADS3G66P77wc0tBiPpRuWAhpaFBGRtvNND5eIiIhIqlLgEhEREUkyBS4RERGRJFPgEhEREUkyBa4kC4W8tSCqiIiIJJ4Cl4iIiEiS+WZZiBDVbpfgKXl7drtdgmdsn7fd7RJERMTjmu3hMsZkGmPWGmO2GmN+Z4wx8bYxxqQbY9ZEtbvKGHPIGPN65DU68R9HREREJPW01MN1B3DIWjvdGLMWuBl4oaU2xpi/AG8Co6LaZQO/stYuS1DtKUtztkRERCRaS4FrGrA68vPLwI1cGLguaGOtfQG4zBizP6pdNjDLGHMb8CEw21pr21N8tIVcD8ByXkvUJVvNS0HrwMxZAFz81OoWWsqcNc5q/KtmaCskERFpm5YCVw40TI46BsQaBoynDcB+YJG19n+MMeuBG4B1jRsZYxYCCwGGDh3aQnmfGMjWuNsKnNm1y+0SPGN3hea7iYhI+7T0lGIZkBX5OSvye1vaABwEXoz6uW+sRtba5dbaAmttQZ8+fVooT0RERCT1tRS4XgJuifw8DXiljW0AvgnMNcYEgPHAjtaVKiIiIuJNLQWux4FBxphtQAXwrjHmwRbavNTEtX4JLMCZTP8Ha63GtERERKRTaHYOl7X2LDC90eFvx9Gm/r0RUT+XAFPbVKWIiIiIh2mleRck8OFMERER8QDfrDRfzDy3S4jL8TM13PDjddw/Yyy3XT7ItTp6FRa6dm+vmTVyltsliIiIx/kmcK3h526XEJfi9yupOHmO/3zjoKuBa8D3l7p2b68JXRtyuwQREfE43wSuVFe/KOqWmkoAtn5Yxd6PjzOqXw/3ihIREZEO4Zs5XAPYwgC2uF1Gi0ptJUN7dyMtYCja9KFrdZzesZPTO3a6dn8v2Vm+k53l+q5ERKTtfBO47mIqd6X4Q5BhG+ZouIobR/dh2pi+/GHLYWrqwq7UcnD2bA7Onu3Kvb1m7tq5zF071+0yRETEw3wTuLyg0h6nljquHJZNYcEQyk6cY907R90uS0RERJJMc7g6UGnYmb/1lz/0phtdyL2oC6s2fcjNY/u5XJmIiIgkk3q4OlBpuJJuZNKdTAImwMwrB/HKnlLKTpx1uzQRERFJIgWuDlQarqRvIBtjDACF+YOpDVv+uOWwy5WJiIhIMilwdZCT9jQnOU3fQHbDsZH9ejBxSC+KNh3S6vMiIiI+psDVQernb0UHLnB6ud75+DjbD1e7UZaIiIh0AN9Mmn+EdW6X0KzScCVpBOltep53fMbEgXx/7S5WbfqQywb36rB6hj/5ZIfdy+tWTl/pdgkiIuJxvglcJVzhdgnNKg1XkhvIImDO71TM6prOZ8b355m/HuF7nx9LZnqwQ+rpOn5ch9zHD8bl6LsSEZH20ZBiB6ixtVTYY/Q12THfL8wfwrEztbyw6+MOrkxEREQ6gm8C1wzuZgZ3u11GTGXhaiyWvoHeMd+/9tIcBvXq2qFb/ZQsWkzJosUddj8vC60PEVofcrsMERHxMN8ErnxWkM8Kt8uIqdRWANAnEHuOViBgmJU/mNf3l3G46nSH1FRVVERVUVGH3MvrVu9bzep9q90uQ0REPMw3gSuVHQ1X0stcRBeT0WSbwvzBWAtPFR/qwMpERESkIyhwJZm1ltJwJX0Csedv1RvSuxuTLunNk5u1JpeIiIjf+OYpxVRVbU9wjtomJ8wDhELOn2l1Q3i/ZitvHajgmktyOqZAERERSTr1cCVZUwuexjIs0J+LuqSxapOGFUVERPxEPVxJVmor6UIGPU33C96r79mql27SmD5hAE//9QhLbhvHRV3Smmzf+FwRERFJXb4JXEeY6HYJMTXesLolx7YO5nTNhzy7rYQ5Vw1JWl2ZY8cm7dp+k9c7z+0SRETE43wTuJbzmtslXOCMPcsxe5KRwfiDUx+TTU/TnaLiD5MauC5+SsscxGvVjFVulyAiIh6nOVxJ1Jr5W/WMMYwMDuHtg5W8d/REskoTERGRDqTAlUSl4UoCGHJMVqvOuzQ4iICBJ7Uml4iIiC/4JnCFyCJE64JNspWGK8kxWaSZ1m1I3c1kcsOoPjy1+TB14eSsybV7TB67x2huUjwmrJjAhBUT3C5DREQ8zDeBK9XU2TrKbHWrhhOjFRYM4aNjZ/jLvqMJrkxEREQ6mgJXkpTbY4QJtzlw3ZTXl+xu6RRpWFFERMTzFLiSpH7CfEtb+jSlS1qQ2y4fxJ93fkzVqXOJLE1EREQ6mAJXkpSGK+hhutHNZLb5GoUFgzlXF+bpvx5JYGUiIiLS0RS4ksDZsLqq2f0T4zFuYBZjB/SkqPjDBFUmIiIiblDgSoLj9hRnONvm4cRocwoGs+PwMXYdOXbe8VBI2/uIiIh4hW9Wml/DQ26X0OCodeZv9WtH4KoPU2fsIDKCeyK9XOPaX1xE/yVLEnYtv1s8ebHbJYiIiMf5JnAVs8DtEhp8HK4knTSyTI92XyvTZPDpsX15+q9H+LzNI2gS0ymZ/aU5CblOZ1A4qtDtEkRExOM0pJgER8OV9AlkE4hzw+qWFOYPoeLkOQ6FP07I9URERKRj+SZw5fMo+TzqdhmctTVU2uNtXn8rllf+O5dudGFfXeLW5Kp8YhWVT2hT5ngU7S2iaG+R22WIiIiH+WZIcQbfANwfWiwLVwG0+wnFaAET4NLgYHbUvcspe6ZdS03U++j++wENLcZj6YalgIYWRUSk7XzTw5UqSsMVGKBPoFdCrzsiOBgLvFt3+LzjelpRREQk9SlwJViprSTb9CTdJLbzMCtwEX1NNvvrPsTa5GxoLSIiIsmhwJVAYRvmaLgqofO3oo0IDqbanuSorUrK9UVERCQ5FLgSqNIep5a6pAWui4MDSSPI/jqtPC8iIuIlClwJVL9hdbICV7pJY1igPwfqSqi1dUm5h4iIiCSeAlcClYYr6UYm3ematHuMCA6hhlreD5ck7R4iIiKSWL5ZFiJEtdslUBqupG8gG5OgBU9j6R/oTQ/Tjf11h7g0OLjN18nbszuBVfnb9nnb3S5BREQ8Tj1cCXLSnuYkp5M2nFjPGMOI4GBKwuUcD59K6r1EREQkMRS4EiTZ87ei1fds7U/gyvMiIiKSPL4JXAu5noVc79r9S8OVBAnQ2/RM+r0uMl0ZEMhlf92hNq/JdWDmLA7MnJXgyvxpzpo5zFmjFflFRKTtfDOHayBbXb1/abiSXNOLgOmYDDsyOJjXwn/lo3A5A4K5rT7/zK5dSajKn3ZXaL6biIi0j296uNxUY2upsMfo1wHDifWGBvqTTlpCN7QWERGR5FDgSoCycDUWS58ODFxpJsglwYG8Hy7hnK3psPuKiIhI6/lmSNFNR23HTZiPNiI4hHfqPuBAXQmh0NCG49rMWkREJLWohysBSsMVZJmL6GIyOvS+uSaLXuYibfUjIiKS4hS42slaS2kSN6xujrMm1xCO2iqqwsc7/P4iIiISH98MKRYzz5X7VtsTnKOGvqbjAxfApcFBFNfuYX/dIQoCeXGf16uwMIlV+cuskVo+Q0RE2sc3gWsNP3flvh254GksXU0XBgf68m7dYa5MGx33shQDvr80yZX5R+jakNsliIiIx2lIsZ1KbSVdyKCn6e5aDSODgznNWQ6Hj7pWg4iIiDTNN4FrAFsYwJYOv29HbFjdksGBvmSS0aqtfk7v2MnpHTuTWJV/7Czfyc5yfVciItJ2vhlSvIupAISo7rB7nrFnOWZPMjI4pMPuGUvABLg0OIhddQc5Y88CXVo85+Ds2QDk7dEq6i2Zu3YuANvnbXe5EhER8Srf9HC5oTRcBbg3fyvaiOAQLJZ36464XYqIiIg0osDVDqXhCgIYckyW26WQHehBrslif92Hbd7QWkRERJJDgasdSsOV5Jgs0kzQ7VIAp5er0h7n7vuPabV5ERGRFKLA1UZ1to4yW50Sw4n1Lg4OJECAfVp5XkREJKUocLVRuT1GmHBKBa4uJp1hgf68V3eEWlvndjkiIiISocDVRvULnvZJocAFzppc56jhw/DHbpciIiIiEb5ZFuIR1nXo/Y6GK+lhutHNZHbofVvSP5BLdzLZX3eIUGhgw/HGc7qGP/lkxxbmYSunr3S7BBER8TjfBK4Sruiwe1lr+ThcycBAbofdM14BY7g0OJjtdfs5aU/T3XSN2a7r+HEdXJl3jcvRdyUiIu2jIcU2OGFPc4azKTV/K9qI4GAs8G7dYbdLEREREXwUuGZwNzO4u0PuVWorgNRY8DSWnoHu9DO92dfMmlwlixZTsmhxB1fmTaH1IULrQ26XISIiHuabwJXPCvJZ0SH3Kg1Xkk4avUyPDrlfW4xMG8Jxe4pSWxnz/aqiIqqKijq4Km9avW81q/etdrsMERHxMN8Ero5UGq6kT6AXARc3rG7JsEB/0giyX2tyiYiIuE6Bq5XO2Roq7XH6Bnq7XUqz0k0aFwcHcKCuhBpb63Y5IiIinZoCVysdrd+w2qTm/K1oI4JDqKWOg3UlbpciIiLSqSlwtVJpuBID9An0cruUFvU12fQ03Xmn7gPCYW1oLSIi4hYFrlYqtRVkm56km9RfwswYw4TgpZTZKv7rzffdLkdERKTTSv3UEKcjTEz6PcI2zNFwFSOCg5N+r0QZERzMgXAJP3x2D1NH9WVoTjcAMseOdbky78jrned2CSIi4nG+CVzLeS3p96i0x6mlLmXX34rFGMN16RP4E69x75NbGXNkEsYYQk9pmYN4rZqxyu0SRETE4zSk2Ar1G1Z7KXABdDdd+d70PN48UMGeOg0tioiIdDQFrlYoDVfSjS50J/b+hKlsTsEQrh/Vh+LaPRwPn3K7HBERkU6l2cBljMk0xqw1xmw1xvzOmAtX+myqjTEm3RizpjXXao8QWYTISuQlL1AarqRvoDcJLr1DGGP4t5kTMBher9nK7jF57B6juUnxmLBiAhNWTHC7DBER8bCWerjuAA5ZaycC2cDN8bQxxnQFihu1j+daKeukPcNJTtPHY8OJ0Qb26spVaXl8HNkLUkRERDpGS4FrGvDnyM8vAzfG08Zae9paexlwqJXXSllHI/O3+nk4cAGMDA5hUKCP22WIiIh0Ki09pZgDVEd+PgaMbmObuNsZYxYCCwGGDh3aQnkd5+NwBUEC9DY93S6lTUIh509jDNemfzI8Fg5bAgHvDZGKiIh4SUs9XGXQMDEqK/J7W9rE3c5au9xaW2CtLejTJ3V6Yo6GK8k1vQgY7z9n0N18Mun/dxv11KKIiEiytZQeXgJuifw8DXiljW1a0y7l1No6yu0xzy0HEY/vP7OH98tPul2GiIiIr7UUuB4HBhljtgEVwLvGmAdbaPNSnNdqql3KKbNVWKwvA5fBcO+T27TXooiISBI1O4fLWnsWmN7o8LfjaFP/3oh42iXCGh5K1qU9u+Bpc94uWALA1WljeePANn638X3mXTvc3aJS1OLJi90uQUREPM43W/sUsyBp1y4NV5JlLqKLyUjaPTraeyPmADDCWtIvLeHfntvD1NF9GJbT3eXKUk/hqEK3SxAREY/z/gzwJLPWRhY89U/vVjRjDD+cOYG0gIYWRUREksU3gSufR8nn0YRft9qe4Bw19DX+ClyX7F/FJfudTZkHZHVl0fSxvHWggt9uOOhqXamoaG8RRXuL3C5DREQ8zDdDijP4BpD4oUU/zt8CuGrT/YAztBgKgbWDmTq6hAf+9A43jumrocUoSzcsBTS0KCIibeebHq5kKbWVdCGdnsbfAaRhaDGooUUREZFEU+BqQf38LS9uWN1aGloUERFJDgWuZpyx5zhmT9I30NvtUjpMYf5gpo7uwwN/ekcLooqIiCSIAlcz/Dp/qzkaWhQREUk8Ba5mlIYrCWDIMVktN/YRDS2KiIgklgJXM0rDFfQ2WaSZoNuldLjC/MHcqKFFERGRhPDNshAhqhN6vTobpsxWMyY4LKHXTRVPzN3d7PvO0OJl3PzTV7n3yW2svHMSgYD/HxyIZfu87W6XICIiHqceriaU22rChDvV/K3G+mdlNgwtrthw0O1yREREPEuBqwlHO+GE+Vg+GVrcw8EyDS2KiIi0hW8C10KuZyHXJ+x6peFKLjJd6WYyE3bNVHLz87O4+flZLbarH1pMDwa4b3XnfGpxzpo5zFkzx+0yRETEw3wzh2sgWxN2rfoNqwcEchJ2zVTTu3LXBcdCoQvbhULO0OLi6WO598ltrNhwkAXXXZzs8lLK7orm57uJiIi0xDeBK5FO2NOc5mynWvC0JbPzB/Ps9hIe+NMetj/fl56B7jEDmoiIiFxIgSuGUlsBaP4WRPd6GX74T85Ti2+c3cZnMiYBnfOpRRERkdbyzRyuRCoNV5JOGr1MD7dLSSn1Q4sf2wp21x10uxwRERHPUOCKoTRcSZ9ALwKdYMPq1pqdP5jBgb4U1+qpRRERkXgpcDVyztZQaY9rOLEJxhgmp08gQID7tNeiiIhIXHwzh6uYeQm5ztFwFQB9jb8D17uXFLb53O4mk6vTxvLGwW08tv4gfzvF308tzhrZ8vIZIiIizfFN4FrDzxNyndJwJQbIDfRKyPVS1aarl7br/BHBwXQZ8RE/en4P08b0ZXhu9wRVlnpC14bcLkFERDxOQ4qNlNpKepmeZJh0t0tJacYYfvDFCc6CqBpaFBERaZZvAtcAtjCALe26RtiGORqupF8nmL+VXbGT7Iqd7bpG/6xM7p8xjrcOVvDY+oOJKSwF7Szfyc7y9n1XIiLSuflmSPEupgIQorrN16i0x6mljj6dIHDd8sJsAJ6YG/8q6rEWOp115SCe3V7Cj57fw41j+nKxD4cW566dC8D2edtdrkRERLzKNz1ciVBav2G1zyfMJ0ooBEuWGHIO1A8tbtXQooiISAwKXFGOhivpShcuMl3dLsVTuhtnaPHtg5W+HloUERFpKwWuKB+HK+kbyMZowdNWm3XlIKaN6cuPnt/Du0dPuF2OiIhISlHgijhpz3CS01rwtI2MMfxw5gS6ZaTxj49v5kxNndsliYiIpAwFroij9fO3Ar1drsS7+vXM5MpzE9nz0XFmhPRUn4iISD0FrojScCVBAvQ2Pd0uxdMGB/tyWfBS9tV9yB+2HHK7HBERkZTgm2UhHmFdu84vDVeQa3oRNJ0jg75wy5NJu/blaaP4OFzJd5/awYRBWYzo2yNp9+oIK6evdLsEERHxON8ErhKuaPO5tbaOcnuM8cFLElhRaqvsPS6h14teoytgAtyQcQXPnP0Ls3+6mfVLrqNbhnf/VRuXk9jvSkREOp/O0Z3TgjJbhcVqwnwCdTOZXJ9+OVX2BPc/rflcIiLSuXm326GRGdwNtG0T6/oFTzvDCvP1Ct5aDLR/E+vmDAz2YWJ4BEXF+ynZlsOI4OCYq9WnutD6kPOnNrEWEZE28k0PVz4ryGdFm84tD1fTw3Qj02QkuKrUdel7RVz6XlHS7zMxbRT9AzlsqNlOZfh40u+XDKv3rWb1vtVulyEiIh7mm8DVHmXhanJNL7fL8KWAMVyffjnppLOuZjMnz9a6XZKIiEiH6/SB67Q9y0lOkxPIcrsU3+pmMrkh/XKq7QkW/XEH1mq/RRER6Vw6feAqD1cDkKvAlVQDgrlcnjaSp7Yc5o5FWp9LREQ6l04fuMqsE7i04GnyXRYcyYBALhtrd7Dno2MXvB8K4clJ9SIiIi3p9IGrPFxFlulOhkl3uxTfq5/PlUE6//D4Zk5oPpeIiHQSvlkW4ggT23ReWbiagYHcBFeT+iqyx7py366mCzdkXMELZRv5v3/YTq/dl2OMcaWWeOX1znO7BBER8TjfBK7lvNbqc07ZM5zmbKecMP/nW91b5qB/IIdv3jyKB1/Yy+S0HEanDXWtlnismrHK7RJERMTjOvWQYpkmzLvmH6aO4FMjc3mzdicV4Qvnc4mIiPhJJw9cVRigt1Hg6miBgOGhL11OZmR9rnO2xu2SREREksY3gStEFiFaF5zKbTW9TA/STDBJVaWuL63M40sr3Z2blHNRF27IuJLj9hTra7an7PpcE1ZMYMKKCW6XISIiHuabwNVa1lrKwtWdcv5WKukX6M0VaaM4GC7hnboP3C5HREQkKTpt4DrJac5yjlwNJ7puQvBSBgf68FbtroaFaEVERPzEN08pttYnE+a1h6LbjDFMSb+cZ87+hXU1m/nu/VMa1kXTQqgiIuIHnTZwlYerCWDINj3cLqVTahykMk0GN2RcwZ/ObeSNmm1MTb8y5dfnEhERiVenHVIss9Vkmx4EO+GE+VTVL9Cb/LTRvB/+iD1177tdjoiISMJ0ysBlraU8XEWOhhNTzrjgJQwO9OXt2l2UhavcLkdERCQhfDOkuIaH4m573J7iHLWdesL82wVL3C4hJmMMn0qfyDNnX2ddzWaqT3+KrK7u7nO5ePJiV+8vIiLe55vAVcyCuNuWWWfCfGdeEuK9EXPcLqFJXSLzuZ47t4H7ntzK/3dHvqvzuQpHFbp2bxER8QffBK7WKA9XESBAL02YT1l9A9kUpI3h+Z27efSNg3zw4sUN7+nJRRER8RrfzOHK51HyeTSutmXhanqbngSNbz5+q12yfxWX7E/tTZnHBi/m5rH9+OFzuznq4nyuor1FFO0tcu3+IiLifb5JHDP4BjP4RovtrLWU2+pOv2H1VZvu56pN97tdRrOMMTw4eyL9emay7txmztpzrtSxdMNSlm5Y6sq9RUTEH3wTuOJVbU9SSx05nXjCvJf89EfpXH7ySk5zhtdrtqXsfosiIiLN6XSBq9w6Q1NaYd47cgO9KEjL48Pwx+yqO+B2OSIiIq3W6QJXWbiaNIJkme5ulyKtkBcczrBAfzbV7mHzB5VulyMiItIqnS5wlUcmzAc68YR5LzLGcG36ZXQ3mXz98c1UnnRnPpeIiEhbdKrUEbZhym11p15/y8u6mHSmpudTduIc3yraSjis+VwiIuINnSpwVdsT1BHu9E8oelluIIvvTc/j5T2lLP/Le26XIyIiEhffLHwaorrFNmVhp02u0YT5J+budruENvvKpGG8+V4FDzz3DltfyuZXS3sn9X7b521P6vVFRMT/OlUPV7mtJp00emrCvKcZY/jhrAn0MN148dzbFL9f4XZJIiIizepUgassXE1OoKer+/JJYvTMTOfWjGvoarrwpV+9xcJFZdryR0REUpZvhhQXcj0Ay3kt5vt1NkyFPUZeYHgHVpW6bn5+FgB/vnW1y5W0Xn2w6m668pmMSfz53Fu8WPM2N3IloVC/89okwpw1zkbfq2ak9lZIIiKSunwTuAaytdn3q+xxwpow36B35S63S0iIbiazIXS9XFPM9VzOxcGBCb3H7grvzncTEZHU0GmGFOsnzGtLH//pYjK4JeMa+ppsXq3Zwr7aD90uSURE5Dy+6eFqSbmtIoN0ephubpciSZBh0rk542perinmjdptzPleLWPTLgYSO7woIiLSFp2qhys3kKUJ8z6WZoLclJ7P0EA/3qrdxbba/W6XJCIiAnSSwFVr66i0xzWc2AkETZCp6VdySWAQm2vfobhmD9ZqRXoREXFXpwhclfY4FqsJ851EwAT4VPpERgWHsr3uXULP7NQ2QCIi4irfzOEqZl6T75WHqwDICWiF+XrvXlLodglJZYxhctp40gmyYsMBTp6ro9uOCQ2blrdmXteskbOSU6SIiHQavglca/h5k++V2WoyyaA7mR1YUWrbdPVSt0tIOmMMBWl53HJjOj99cS/DA3V8Kv1ygqZ1Hbuha0PJKVBERDoN3wSu5jgrzGvCfGdkjOGeT4+kW0aQZc/upramjqnpVwJBt0sTEZFOxDdzuAawhQFsueB4ja2l2h4nVxPmz5NdsZPsip1ul9EhQiE4/PIlTE4bz6FwKS/WvM2Js7Vxn7+zfCc7yzvHdyUiIsnhm8B1F1O5i6kXHK+wx7BAruZvneeWF2Zzywuz3S6jQ41OG8an0ifycbiCr/zmTapP1cR13ty1c5m7dm6SqxMRET/z/ZBief0K83pCUYBLg4NJI403Dm/h9l9v5LKKq8k0XbQ4qoiIJFWzPVzGmExjzFpjzFZjzO9MjElQsdo0cewqY8whY8zrkdfo5H2sT5SFq+lGF7oZTZgXx7Bgf349r4D3yk7w3LkNnLRn3C5JRER8rqUhxTuAQ9baiUA2cHOcbWIdywZ+Za2dEnm9k6gP0ZxyW6XlIOQCN4zqw4oFV3PKnuW5c+v51uJThELaBkhERJKjpcA1Dfhz5OeXgRvjbBPrWDYwyxjzljFmdazeskQ7Z2uotie14KnEdM0lOdyacQ01tpbnzm2gKnzC7ZJERMSnWgpcOUB15OdjQO8428Q6th9YZK29GhgA3BDrhsaYhcaYTcaYTUePHo33c8RUHj7mFKgnFKUJuYFefCZjEmEsfzq3oWHOn4iISCK1FLjKgPq0khX5PZ42sY4dBF6MHDsI9I11Q2vtcmttgbW2oE+fPi1/gmaUW2eFefVwSXOyAz35bMYkggR4/txGNn9Q6XZJIiLiMy0FrpeAWyI/TwNeibNNrGPfBOYaYwLAeGBH28u+0COs4xHW9pKTIgAAHulJREFUnXesLFxNd7qSabok8la+8MItT/LCLU+6XUbKyApcxGe7TKaLyeCO/3iT9e9+8v8tVk5fycrpK12sTkREvK6lwPU4MMgYsw2oAN41xjzYQpuXmjj2S2AB8CbwB2vtrsR9DCjhCkq44rxj5bZavVtNqOw9jsre49wuwzWxJshfZLrx2YzJDM7uyoJH3+blPR8DMC5nHONyOu93JSIi7dfsOlzW2rPA9EaHvx1Hm1jHSiDGyqRJctbWcNyeYmRwSEfdUnygm8lk5cLJfPU/32Thb4v52dwr+PxlA9wuS0REPM43K83P4G5mcHfD7/WTn3ONloSIpeCtxRS8tdjtMlJS7+4Z/P7OSfS2vfj67zdzw8/vJbQ+5HZZIiLiYb4JXPmsIJ8VDb+XRSbMa4X52C59r4hL3ytyu4yUFArB//u3dG7OuJr+gVwqsv7E6n2r3S5LREQ8zDeBq7HycDU9TDe6mHS3SxGPSjdp3JRe0PD7w6/sx1rrYkUiIuJVvg1cZeFqrb8l7ZZmgg0///j5d1jw2Nt8VK2tgEREpHU8u3l1c1uwnLZnOclp8gLDO6oc6QSuSRvLxvf2cMtPXyX0hXF88YpBdMCGCSIi4gO+7OFqmDCv+VuSQHlpF/PcPdczsl8PvrlqKwt/V8zR42fdLktERDzAn4HLOoGrt+npciXiNxfndmfVXZMpSMvjpV1HmbLsVdZuO+J2WSIikuI8O6TY2BEmNvxcFq4my3QnQxPmm1SRPdbtEjyj19m8hp+doWzD+LRLGBzow+s1W/n677fw3PaP6LF3PJkmI6qdiIiIwzeBazmvNfxcHq6ifyDXxWpS359v1TIH8brpyKqYx3sFevC5jGvpN/U9HnpxL8G6cianT2BYsH8HVygiIqnOd0OKp+wZTnFW87ekQwRMgH+8cQTPfH0K3Uwmr9QU89q5v1J9qsbt0kREJIX4LnCVRSbMa8FT6Uh5A3ry+YzrmBgcyYHwEW556FVeeafU7bJERCRF+CZwhcj6/9u78zi5yjrf459f7b13p5ukt5Ckk5BANpCwDIKyjIASB5VFBke5Ovc640tRr47jDg06d7wvl+t2XRhGLzqoCCjKvoiIYUkChKQhbFlJZ+8mXemkl9qe+8epXtPp7pDuVJ3K982rXufUqVNVT/fJqf7ynKd+D81U0J7pwEA1uMbw/t+cyPt/c+LYOwp3zlrEnbMWjblf0AKcEj6BZZG3UlEU5sM/X8Xn71hLZ496u0REjnUFE7j6tLk4lVY2pGClyNFUHajg7mvP5mPnzub2Z7dy8Xf/yhPr23LdLBERyaGCC1ztmbguJ8pR1dx88LcSo6Egn794Pnd87Cyi4QAfuHkFX7mrhQO9qRH3FxGRwlYw31Ls00OCGl1OlBzrC1TNzVXc98lz+OaDr/CzJzbx+KttnJRZTG2gOqftExGRo6vgergAqgOVuW6CSL9YOMhXl53EbR/9G8zggcTTrEyuozuRznXTRETkKCm4wGUYVVaW62aIHOT0WVO4/1PnMD84g3XpTVzy/b/y7Ja9uW6WiIgcBQV3SbFKA+YlT3mXGUOcGV7I8YFanmhby+U/fpIFwSbuaD6BWFj/bkVEClXBBK4/uu+yKrWOGl1OHJdVS2/IdRN845S26yb8NeuDNVwaOIdVqZd4Ib2RM6/bzdnhk/nhjaOPPxwYGzbhTRIRkUlUMIHrMXclv0s/xlkhDZgfj41zrsx1E3yjqfOKSXndiIV5a3gxMwK1PJls4d7EEzQ9NJtPnD+XSKjgrvaLiBzTCiZwtTtVmJf8M56eqMbgVC4NvI2VyRf5/qPreeSl3Xz7yiWcWFc+6e0TEZGjo2D+N3qpu4Wrg49SqQHz49K0/rc0rR95UmYZamPZ7Wwsu31S3yNqYc6JnMz54aVs3NHLJd9bztfuWcfuzp5JfV8RETk6CqaH69rQNwBo5ts5bok/nPbM9YAuLY7H6pobgcm7tDjY8cFpTA1U8UzqJX62fDP/b/kWPvTW4/nnt89mWnls0t9fREQmR0EELuccWK5bITIxYhbh7PASFgfnsDa9nl88tYVbV7zO3582nS43mxIrynUTRUTkMBVE4Iq7AwpcUnDKAyWcHVjCz66dy48eW8+tK14nk9nK3GAj2zrm0FCp4CUi4hcFEbjaXUeumyAyaX72g2JiLOY94TmsTW3gtfRWzv3mVi4/dTqJ52dTFigGVCpCRCSfFUTgasvEQTUjpcCVWjFnhRexODSHolM2cNuqrSTTW5kTbGRxcA5QnOsmiojIIRRE4GrPxHPdBJGjptSK4PmFXBqaTQsbeDW9lfXpVgK3N/Dx8+Yws6Yk100UEZFhfB+4Mi7DG25frpshctSVWBFnhheyODSHltQG/rjmdX63ehuXnlzPJ86bQ9NxpbluooiIZPk+cMXdAVKk+WD6RWYHG3PdHN+47aqXct0E37hsU0uumzCqYotxRngBt3x2Njf9ZSP/tWILd63ext8tqecT589lzlQFLxGRXPN94GrLeAPmq01zKMqx7UffjgEncWlgNi8GNnLP81u46/ntzArUc9On5jB3mooCi4jkiu8rzbe7OCGCVJjGrYgAFFmUpeETuTx6HguDs9ma2cWF332cj//qOV7eqcvvIiK5UAA9XHGqrYJ/srcDcBOP57hF/vCOBy8D4OGL7sxxS/Lfn+q9avwXbPfXVEgxi7I0PJ+FoSbWpTby0Not3Lt2B+9cWMu158/lpHrN1SgicrT4OnClswPmTwzOpJ41uW6Or0zZuy7XTfCNjqi/x7vFLMJbwvNZEGpiXWoTy1/bzP0v7OTCk6bxyQvmsrBBE76LiEw2X19S7HCdZMhQE9AfDJGxRC3CKeF5LP/C+Xz6b+fy9MZ2lv1gOf/9llU8u+UNb4osERGZFL7u4eqrv1VtClwi41VRFObTf3sCHzl7Frc8sZmbl2/ikR8/RUNlEe9cWMu7Ftfxh/+sxMybL0sV7EVEjpyvA1ebixMhRJmpwrbI4SqPhbn2grl8+OxZPPDCTu5v2cEvntrCzcs3UUKMGcE6ZgZryWSqCAQ0WamIyJHwdeBqz3RQHRj4P3ERGVtfj1XfsjQa4vJTG7n81Ea+dH2SrYFdbE7v4OX0FtalN7HwSzFmBGuZGaxjqlVhZur1EhE5TL4NXCmXZq/rZEGgKddNEfGlkUJTxMLMDjYyO9hIwiXZmtnN5vQOXkm/zkvpzRQTZUawjpWb6lg6Qz1fIiLj5dvAtdd1ksFRnR0w/yzX5LhF/rKh6YpcN8E3Zu67LNdNyAkvfDUwO9hAwiVpHRS+rvzpZqaWRbl4YS3vWlTHaTOnEAwMHfOlXjARkQG+DVzt2QrzNQGvwvzdfD+XzfGdZ06/MddN8I1T25tz3YSci1iYpmADTcEGki7FGe/bzX1rd3DrU1v5xVNbOK4sysULvPCVcVMI6DK/iMgQvg1cbS5OjAglxHLdFJFjSthCPPf7emqp56poitbMbkpm7uD2Z7fyy6e3ECPCjGAtT66v4/RZUwgFfV19RkRkQvg2cLVn4lQHKvoHzNexGoAdnJLLZvlG1RsvArB3yoIctyT/7Y14v6uqhH5Xw4UtxKxgPc0fqKcrkeLPL+/hm7/ZwYb0Nq6++XWqSyJcuKCWSxbVcWaTwpeIHLt8Gbi6E2k6XCfHB6b1b/snzgWgmXiOWuUvFz50OQC3XeXvKupHw6MNVwFw2aaWHLckvxVHQlyyuI5Vv6sj5dLemK/uHdy+chu/Xvk6USLMCE5jRqCOqYEqwuZ9/Gisl4gcC3wZuNbtiOOA6uz4LRHJLyELMjNYx8ygF762ZXazOb2TjentvJreigEVVsZxgQr+6+lKTp5eybzaMsLqARORAuXLwLW21evF0pQ+IvkvZEFmBOuYkQ1fOzPt7Ml00OY6eD29i6/c1QpAJBRgQX05Sxor2bCqghqr5NvNJSo9ISIFwbeBq4goxaYB8yL5YLyXBUMWpDE4lcbgVACcc+x33bS5Dk44s4M1W+Pctmor3cnNADxyY4gljZUsbqxgyXSvJ2xa+Zs/71WyQkRyxaeBq6O/HISI+JeZUWbFlFHMly+pByCVzvDZG/Z7vWDJOOs2dPDE+o04vMm1p5VHWdJYyZLplSxprGRRYwUVReHDel8FLxE52nwXuDp7kmxsO8DJwYZcN0VEJtBA+AlQFSinKlDOCdktKZfmvR/Zx9rWDtZs7WBta5yH1u3qf25TTUl/L9iS6ZWcVFdOLBw8yj+BiMih+S5wvbh9H85BtWn8lsixImRB7v55FVDFd5u9bV+8Pkl7poM2F6d6agdPbmjnrue3e/sHjPl1ZSxurOTkxkoWT69g7tQyQOPBRCQ3fBe4WrID5quHDZj/KY/loDX+9dCFd+S6Cb5x/rbf5LoJMoKohakPHkc9x8FGaAD++Ys9rBnUC3b3mu38asXrABRHgpQlK6gJVFATqKTGKnGuqL+W32gGX3rUZUgReTN8F7jWbovTUFlEUU90yHYVPD08Kng6fip4ml9GCzy1FTFqK2q5aEEtAJmMY1P7geylyDj3PtXBS+ktZNKbAPjz1yPepcjGSpZMr2BxYyU1pdFDv8EI7VAAE5Hx8F3gamntYFFDBWzIdUtEJN8cHH4MKM3eGrkkCmmXocN1sifTQXtPB8+/Gucvr7yWHZIPDZVFnDx9IIAtaqjAhx+VIpJnfPUpEu9Ksrm9iyuWTmfPsMD1bj4JaBLr8Vq68jpAk1iPx7PVzYAmsS4UQQtQbRXZYQkzAEi6FO0uTlumg2nHx1mztYN7W3YAEDAop7T/MmRLq1ekFVSkVUTGz1eBq2WbN35rSWMljwx77FRuARS4xmv2xtsBBa7x2Fx+J6DAVcjCFqLWqqkNVNN8tbetfX8va1vjrGnt4I5HO2hN72Y9rbz7hxAgwBQrpyZQwe+e874ZOataRVpF5NB8FbjWbusAYFFDxUGBS0RkIgxclozS3DyV8+ZPpWP5QJHWdhf3aoRlOlifbuUzv90CQJgQS5sqmF9XxvzaMubVlnPCtFKKI776mBWRSeKrT4KW1jgzqoupKD68IociIm/G4DFhg4u0zgzWAZBxjrjbT1t2qqLXNsVZtXErKdLZ58DxU4qZN62MebXebX5tGTOrSwhp3kiRY4qvAtfa1jinHK8K8yKSHwJmVFkZVYEy5jId8HrCOl0Xe10nHa6T+vpOXt65j4fX7eofmB8JBZhzXGl/COsLYrXlsXGVqRAR//FN4Grf38u2jm6uOWtGrpsiInJIZka5lVBOCTOopfkD3vavXJ8m7vbT4To56W86eXlnJ09taOf3q7f1P7c8FhoUwsqZX1vGCdPKDnvqIhHJP74JXH0D5hc1qIdLRPyj77JkyILetyOpoHclzMK79UYTvOeaTl7d5YWwV3Z28ofV2+nsfb3/NeoqYsyrLaNtvdeb9sWPlTN7agnRkKYvEvEL/wSu1jhmsLChfMTHt7PkKLfI396oOinXTfCNyt4Tc90EKWBRi3BGUzVnNFX3b3PO8bnmHvZm9tHh9jN91j5e3tnJq+k2MmnHX78PhlFqRSxqKqKxqojXni+m1IootWK++pkippXH+NqNA5cnhxdqVcFWkaPLN4FrTWucppoSymIjd63fxONHuUX+9vBFd+a6Cb5xwfbf5roJcgzxgpAXpkqDRUxnGs1XeY9dd32GuDtAh+tkb6aTTtdFT7KLx17Zw+5Ub/9r3P+NgUDWd/veI8U0VhWxM+OFsnQmRnACy1goyImMzjeBq2VbB2fNrsl1M0REJtxYIaXv8YAFvEH6lDGr72riblgMpKJpDrhu9vffuvqXrZk9/J9Heoe85p1fMkosRqkVc84pRTRWeYGssaqIxinF1JZPbCATOdb5InDt2tfDrn292Sk2RERkuJAFqbBSKigd8fEvfDnNjngP//69rmGBrJvHX9vDrn1DA5nhBbJFs7wgVldZRHVJhKqSiLcsjlBd6i0joYESF5roW2RkvghcLa3egPnFjYcOXM1UZJfxo9Imv3v/b7xxSbdd9VKOW5L/7py1CIDLNrXkuCUib943/i0IlFAfLDn4wYTXQ/aP1/bQureL1r3d/PoPXiBLpL1AtruzF+cOfipAWTQEiQgxIkQtQsy89Z/+JcKUkoFbdUmUqpIwpdGQyl/IMccXgWvttjgBg5PqRx4wLyIiRyZkQWbVlDCrxgtkr9yXfWAXLAIykQyf+lySNw4kaD+QYO+BBLfclqCXBD2pBD3mrXe5Ht7I7KOHBC/cnxnxvSLBAFOyvWUdu7xwFrMI7zx/oAetNBqiOBKkOBKiJDqwLAoH8zasqXdPRuOLwNXS2sHcqWUUR0L6RywiMklG+3wNWIAffCsKRPu3zR/lL4hzjhRpelyCHhL0uF56SfK2d/TSfiDBo8sT7N+fIOUStNFFTybBdx5OjdlGMygOBymOhigZFMiKIkPvF/fdj4b40wNBQhbimqu97cWR4KB9QhRHg4SDB18WHe/fG/1dkvHI+8DlnKNlW5xz503NdVNERGSczIwwIcIWoozi/u0ffZu37F1x8HPSLkMvCXpdgiQpki5NihRJ0qT61l2a005PcSCRZsUzKTpJUzkrRbw7yY6ObroSaQ4kUnQl0iRSQ3vYHr/l0O2NBAMUR4OUREIc6A0SIsSr/xHs72V7+QVvWxgvvL3nkoFwty0dImze4yGChC1EVyK/e+Pk6Mv7wLU93kPb/gRLRhm/JSIi/jBab1DQAhQTo9hio75G6hmvn+1tkeyGbSPsFIJMMOOFtcHhzaW58movsHX1Diy7kgP3V61OkSLN+o1e3EuSIuXS3jI7T+azfxj957ztOm/Z18vW1/tWGgtRVxGjrqKIhsoY9ZVF2fUiyos0tq2Q5X3gamntAGBRoyrMi4jI+AUsQJQAUcIwKMf89baB9cGXDw0oBc6LcEjOOdJkhoWw4b1xA8ulp6fpSqTpSqR4bm2adpdkZ1UH98V3kEwP/RZCiCAlVkSJFVFqMZadX0R9ZRH1lTEaKouorYhpdgEfy/vAtbY1TihgzK8ty3VTRESkwBzu+CszI0SQEMEhIe5Qvrps0Hu9nF3+K2Qyji/c0MsB180B19NfQ63L9bDfdfNGJs53Hk4c9Ho1pVEaKr0esvrKIlpWxPpD2vWfi1FTEiWg+ml5Ke8DV8u2OPNqy4iFR0/1d/Pdo9SiwrBq6Q25boJvnNJ2Xa6bICI+NVKg65tNoNi8y6fHHeK5KZema3AYo4f93d3s6ermQGI/j7+2h65Uun//e//NG4tWWxGjvjJGfUURU0oilMZClMXClEW9S5ql0RBlMe9WGg1TFvO+SKDLmZMr7wPX2tY471pUO+Z+z/Lho9CawrFxzpW5boJvNHVekesmiMgxKGRByq2EckaonbYPnDkS0eSQHrIDrpum6T1s7+jm6Y3t7I4n+8edjSZgUBINURb1wtnBwWwgnJXGRt6vKDL25c5D1XIbutM49gFKYyFfzYaQ14ErkcqwrzvJogaN3xIRERnMzIhmi81OYVCdyldgCrAQIAYZ5315IEmSpEuRIDVkmSRJwqVIJlMsWJCisyfJ2pdSJF2CBF1ES1Ps70nRnRw7uB1tUcJesd3s7+Gtpw6dDWFKSYQ7fxUhmq31FsLryctFKY+8Dlx9B3e0CvN9TuXngHq6xqtpvTchs3q6xrax7HZAPV0i4k+H+vLASJqzH3NDAkkSCEIm4H1ZIOFSXtkOUiSc9y3Oi5al6E54f7MffHCsFg1txMUXe8sHHjjUHgdzOBIu5RXezdZ62++6+MOqDnpI4A7RTRYgQIwIK7/nhbFtG70gFrUIly2LMKV4YGaEqpIwVcWRITXajkR+B65EmuJQgBOmjT1g/t18GlDgGq/TnrkeUOAaj9U1NwIKXCJS+MYqftvXozbca/cPrC84zGSx9ZE397xDcc6RJNUfxHqdF8oGh7P6Sm/GhDbnFd1NkuL5u0Z+vQghqitCZJwj47zXzzi8+xmH61sf41JoXgeurkSaU+vKh0yMKiIiInIoZkaEMBELjzz+DWi+Jrts9pYZl+kPZ++/JsEb2emr7rw3SY/rJbU/jWX/O20pBMwImPdeK1dYf4/cy6O0K68DV3cyzeIGFTwVERGRiTO8Jy8wqOjuQ78c2H7ySClpzdC7p4cH1u/j0PK66yjjHItUYV5ERER8btTAZWYxM7vHzNaY2S9thCIdI+0z3m3jaeB4BsyLiIiI5LOxerj+AWh1zi0BqoB3jHOf8W4blRnMOa4U8Lr/NCO7iIiI+NFYget84OHs+qPAeePcZ7zbRlUUDhKaoK9jioiIiOTKWIPmq4F4dn0fMG+c+4x320HM7KPAR7N3e83shTHaCMDARDX+qTqbS/2/rxvy4vdVA7TluhFjeUH/to6EL46xHBEd48KnYzy2GYd6YKzA1Qb0DaKqYORf9Ej7lI5z20GcczcBNwGY2TPOuaVjtFF8Tse58OkYFz4d48KnY3xkxrpe9yfgwuz6+cCfx7nPeLeJiIiIFLyxAtetQIOZrQXeADaY2bfG2OdPh7FNREREpOCNeknROdcLLBu2+V/Gsc94t43lpsPcX/xJx7nw6RgXPh3jwqdjfATMuTEm/xERERGRI6KaCyIiIiKTLC8D15utSi/5z8xOM7NWM1uevS3RsS4cZhY2s7uz6xM244Tkl2HHefg5PU/H2d/M7BYze9rM/mhmpTqPJ0ZeBi7eRFV68Y0q4MfOubOdc2cDp6FjXRDMrAh4loFjOCEzTkh+GeE4DzmnnXOvoOPsW2Z2NhByzp0JlAMfQefxhMjXwHXYVenFN6qAy8xspZndCVyAjnVBcM51O+cWA63ZTRMy44TklxGO85BzOtvboePsX7uA72XXA0AzOo8nRL4GruFV6afksC0ysdYDX3XOnQ7UAe9Dx7pQjXQe69wuPMPP6bej4+xbzrnXnHMrzey9QAZYjc7jCZGvgWs8Fe7FnzYDjwxaz6BjXahGOo91bheezQw9p6ei4+xrZvZ3wCeBdwM70Xk8IfI1cKkqfeH6DHCVmQWAhcBn0bEuVJpx4tgw/Jx+AR1n3zKzWuBzwDLnXCc6jydMvgYuVaUvXD8EPgysAH4P/Cc61oVKM04cG4ac0865deg4+9k1eJeGHzSz5UAYnccTQoVPRURERCZZvvZwiYiIiBQMBS4RERGRSabAJSIiIjLJFLhEREREJpkCl4jkJTObn52zz8wsOOyx0CGeU2Fm4VFec8pI6yIik02BS0Ty1YeAzwNNwH1mdp+Z7TKz+4DbAczsSjN7y6DnfAO4eaQXM7MTgL+YWY2ZRYAnzey4YfvMMLMl2fURQ132sS+a2Tlm9r/M7PNmVmVmvzCzhiP5gUWkcClwiUi++hqQds5tAK5wzr0LWJVdfjq7TwvwHQAzmwucCoTM7O2DXyjbm3UW8ABwKfB1YCtwSTaI9ekBvp1df8jMHhl022Nmoexcgc8BZUAK6MKb3mQRUGdmp2WLR4qI9FMdLhHJO2YWcM5lsutTgF875y4ys3uAfwAedM6dkX28GAgCjwH/A3gNb6qZ24CbnHP7s6Hq+3g9YO/I7v8AcDHwinPu54PeezqQBGYD6ezmOPBN59wyM5sN/AQ4A2/ak614lbYvyC7/HrjGObdyMn43IuJPh+wyFxHJoQ+a2SeAh5xzXzazfWY2M/vYlcDPzexDeJcdfw/8I3AvcLZz7jkzuxD4D6AD+BlegFoMfAWoBQxYildR+2UAMzsl+9o1eL1rlXiBy4DB/2e6Ba+y9i5gNzAPqAfuAH4LNClsichwClwiknecc7eY2Yt4k+cC/AtewAF4NLveDWzE67F6D14o+rqZzQC+BXzYObc/+5wQsBxv3r/3Zu/fDlyBF6hwzq0GVpvZHc65183sk3ihLAqcN6h51XjzBUaAe/AmYL8Ibx7Bp4BtE/ebEJFCocAlIvnMmdnVeD1YSeAteHP3RfFCVRxw2YDUCJyJNz/n/x4UtgD2Ao/jBbOleONXk9nboXqjapxz55rZT4Ztvwz4IHA8cC7Qlb3UeHP29Vcf2Y8sIoVIgUtE8tkMYKVz7gIAM7vHObes70EzOxuImVkz3rcT1znn3jv4BczsLOA6vAHuAA14vVpV2fvvNrNfOOduzd4vz4anYjN7DDgB+J99r+ec+1H2G4wn44WrfdmHbsUbNzZrIn5wESksClwikq+m432z8Kuj7BMEPgZcnb3f36tlZv8X+IJz7km8wfF92z8EBAcPlB/02PF4g+G/BHwm28P1RbwescG1wH4A/Cvw34CUmf0Z+DjQinf58Z7D+klFpOApcIlIvloDvNM51zpoW+mwfeYBP3HO3Zv9tuJZZvYIUAy84ZzrHOF1Qxz6s68ceJ9z7hkz+wqAc+7fzexGIAFgZmcCX8YbE3Y63mD8u/FC2lPAvWa2xzm34vB/ZBEpVCoLISK+ZmZB51x67D0n7P0ieOPGkoO2FTvnurLr/SUtRET6KHCJiIiITDJVmhcRERGZZApcIiIiIpNMgUtERERkkilwiYiIiEwyBS4RERGRSfb/AQlGQKc4SXXoAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 720x576 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "fig, ax = plt.subplots(figsize=(10,8))\n",
    "sn.distplot(train['length'], bins=train['length'].max(),\n",
    "            hist_kws={\"alpha\": 0.5, \"color\": \"blue\"}, ax=ax)\n",
    "ax.set_xlim(left=0, right=np.percentile(train['length'], 95))\n",
    "ax.set_xlabel('评论单词数')\n",
    "ymax = 0.025\n",
    "plt.ylim(0, ymax)\n",
    "ax.plot([mode, mode], [0, ymax], '--', label=f'mode = {mode:.2f}', linewidth=2)\n",
    "ax.plot([mean, mean], [0, ymax], '--', label=f'mean = {mean:.2f}', linewidth=2)\n",
    "ax.plot([median, median], [0, ymax], '--',\n",
    "        label=f'median = {median:.2f}', linewidth=2)\n",
    "ax.set_title('评论的单词个数分布', fontsize=16)\n",
    "plt.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>comment_text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>00001cee341fdb12</td>\n",
       "      <td>Yo bitch Ja Rule is more succesful then you'll...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0000247867823ef7</td>\n",
       "      <td>== From RfC == \\n\\n The title is fine as it is...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>00013b17ad220c46</td>\n",
       "      <td>\" \\n\\n == Sources == \\n\\n * Zawe Ashton on Lap...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>00017563c3f7919a</td>\n",
       "      <td>:If you have a look back at the source, the in...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>00017695ad8997eb</td>\n",
       "      <td>I don't anonymously edit articles at all.</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 id                                       comment_text\n",
       "0  00001cee341fdb12  Yo bitch Ja Rule is more succesful then you'll...\n",
       "1  0000247867823ef7  == From RfC == \\n\\n The title is fine as it is...\n",
       "2  00013b17ad220c46  \" \\n\\n == Sources == \\n\\n * Zawe Ashton on Lap...\n",
       "3  00017563c3f7919a  :If you have a look back at the source, the in...\n",
       "4  00017695ad8997eb          I don't anonymously edit articles at all."
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.数据处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [],
   "source": [
    "list_sentences_train = train[\"comment_text\"].fillna(\"_na_\").values\n",
    "list_sentences_test = test[\"comment_text\"].fillna(\"_na_\").values\n",
    "list_classes = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult',\n",
    "                'identity_hate']\n",
    "\n",
    "y_train = train[list_classes].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "embed_size = 50\n",
    "max_features = 20000\n",
    "maxlen = 100  # 使用每个评论的前100个单词"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "将评论分词并向量化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = Tokenizer(num_words=max_features)\n",
    "tokenizer.fit_on_texts(list(list_sentences_train))\n",
    "list_tokenized_train = tokenizer.texts_to_sequences(list_sentences_train)\n",
    "list_tokenized_test = tokenizer.texts_to_sequences(list_sentences_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = pad_sequences(list_tokenized_train, maxlen=maxlen)\n",
    "X_test = pad_sequences(list_tokenized_test, maxlen=maxlen)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "word_index = tokenizer.word_index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
       "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
       "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
       "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
       "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
       "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
       "          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\n",
       "          0,    0,    0,    0,    0,    0,   52, 2635,   13,  555, 3809,\n",
       "         73, 4556, 2706,   21,   94,   38,  803, 2679,  992,  589, 8377,\n",
       "        182])"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"D'aww! He matches this background colour I'm seemingly stuck with. Thanks.  (talk) 21:51, January 11, 2016 (UTC)\""
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['comment_text'][1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([   9,    6,   40,   81,   22,    6,   18,  689,   61, 1401,  391,\n",
       "       1347,    9,    6,   18, 1972,   92,  119,    4,  870,   88,  162,\n",
       "          6,   40,  139,    5,  163,    3, 1401,    6,   18,  689,   32,\n",
       "        423,   13,  164, 1219,    4, 4417,  340,   85,   16,  136,   48,\n",
       "        714,  148,   51,   18,   55,  870,   17,  948,   15,  398,   12,\n",
       "        254,  106,   22,    1,  118,    8, 1544,  177,    5,  280,  191,\n",
       "       1087,  500,   28,  269,   81,   83,    1,  118,   44,   16,  136,\n",
       "       1784,  773,  148,   22,    6,   18,   54,  235,   45,  209,   88,\n",
       "         34,    1,  369,  246,  235,   27,  127,    6,   38,  248,  490,\n",
       "        248])"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train[10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\"\\nFair use rationale for Image:Wonju.jpg\\n\\nThanks for uploading Image:Wonju.jpg. I notice the image page specifies that the image is being used under fair use but there is no explanation or rationale as to why its use in Wikipedia articles constitutes fair use. In addition to the boilerplate fair use template, you must also write out on the image description page a specific explanation or rationale for why using this image in each article is consistent with fair use.\\n\\nPlease go to the image description page and edit it to include a fair use rationale.\\n\\nIf you have uploaded other fair use media, consider checking that you have specified the fair use rationale on those pages too. You can find a list of \\'image\\' pages you have edited by clicking on the \"\"my contributions\"\" link (it is located at the very top of any Wikipedia page when you are logged in), and then selecting \"\"Image\"\" from the dropdown box. Note that any fair use images uploaded after 4 May, 2006, and lacking such an explanation will be deleted one week after they have been uploaded, as described on criteria for speedy deletion. If you have any questions please ask them at the Media copyright questions page. Thank you. (talk • contribs • ) \\nUnspecified source for Image:Wonju.jpg\\n\\nThanks for uploading Image:Wonju.jpg. I noticed that the file\\'s description page currently doesn\\'t specify who created the content, so the copyright status is unclear. If you did not create this file yourself, then you will need to specify the owner of the copyright. If you obtained it from a website, then a link to the website from which it was taken, together with a restatement of that website\\'s terms of use of its content, is usually sufficient information. However, if the copyright holder is different from the website\\'s publisher, then their copyright should also be acknowledged.\\n\\nAs well as adding the source, please add a proper copyright licensing tag if the file doesn\\'t have one already. If you created/took the picture, audio, or video then the  tag can be used to release it under the GFDL. If you believe the media meets the criteria at Wikipedia:Fair use, use a tag such as  or one of the other tags listed at Wikipedia:Image copyright tags#Fair use. See Wikipedia:Image copyright tags for the full list of copyright tags that you can use.\\n\\nIf you have uploaded other files, consider checking that you have specified their source and tagged them, too. You can find a list of files you have uploaded by following [ this link]. Unsourced and untagged images may be deleted one week after they have been tagged, as described on criteria for speedy deletion. If the image is copyrighted under a non-free license (per Wikipedia:Fair use) then the image will be deleted 48 hours after . If you have any questions please ask them at the Media copyright questions page. Thank you. (talk • contribs • ) \"'"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['comment_text'][10]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.使用 Glove 词向量"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_coefs(word, *arr):\n",
    "    return word, np.asarray(arr, dtype='float32')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "embedding_index = dict(get_coefs(*o.strip().split()) for o in\n",
    "                       open(EMBEDDING_FILE, encoding='utf-8'))                                                                                                                                                                                                                                                  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Program\\Anaconda3\\envs\\nlp\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: arrays to stack must be passed as a \"sequence\" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.020940498 0.6441043\n"
     ]
    }
   ],
   "source": [
    "all_embs = np.stack(embedding_index.values())\n",
    "emb_mean, emb_std = all_embs.mean(), all_embs.std()\n",
    "print(emb_mean, emb_std)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "word_index = tokenizer.word_index\n",
    "nb_words = min(max_features, len(word_index))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'the': 1,\n",
       " 'to': 2,\n",
       " 'of': 3,\n",
       " 'and': 4,\n",
       " 'a': 5,\n",
       " 'you': 6,\n",
       " 'i': 7,\n",
       " 'is': 8,\n",
       " 'that': 9,\n",
       " 'in': 10,\n",
       " 'it': 11,\n",
       " 'for': 12,\n",
       " 'this': 13,\n",
       " 'not': 14,\n",
       " 'on': 15,\n",
       " 'be': 16,\n",
       " 'as': 17,\n",
       " 'have': 18,\n",
       " 'are': 19,\n",
       " 'your': 20,\n",
       " 'with': 21,\n",
       " 'if': 22,\n",
       " 'article': 23,\n",
       " 'was': 24,\n",
       " 'or': 25,\n",
       " 'but': 26,\n",
       " 'page': 27,\n",
       " 'wikipedia': 28,\n",
       " 'my': 29,\n",
       " 'an': 30,\n",
       " 'from': 31,\n",
       " 'by': 32,\n",
       " 'do': 33,\n",
       " 'at': 34,\n",
       " 'me': 35,\n",
       " 'about': 36,\n",
       " 'so': 37,\n",
       " 'talk': 38,\n",
       " 'what': 39,\n",
       " 'can': 40,\n",
       " 'there': 41,\n",
       " 'all': 42,\n",
       " 'has': 43,\n",
       " 'will': 44,\n",
       " 'please': 45,\n",
       " 'no': 46,\n",
       " 'would': 47,\n",
       " 'one': 48,\n",
       " 'like': 49,\n",
       " 'just': 50,\n",
       " 'they': 51,\n",
       " 'he': 52,\n",
       " 'which': 53,\n",
       " 'any': 54,\n",
       " 'been': 55,\n",
       " 'should': 56,\n",
       " 'more': 57,\n",
       " 'we': 58,\n",
       " \"don't\": 59,\n",
       " 'some': 60,\n",
       " 'other': 61,\n",
       " 'who': 62,\n",
       " 'here': 63,\n",
       " 'see': 64,\n",
       " 'also': 65,\n",
       " 'his': 66,\n",
       " 'think': 67,\n",
       " 'because': 68,\n",
       " 'know': 69,\n",
       " 'how': 70,\n",
       " 'edit': 71,\n",
       " 'am': 72,\n",
       " \"i'm\": 73,\n",
       " 'people': 74,\n",
       " 'why': 75,\n",
       " 'up': 76,\n",
       " 'only': 77,\n",
       " \"it's\": 78,\n",
       " 'out': 79,\n",
       " 'articles': 80,\n",
       " 'use': 81,\n",
       " 'when': 82,\n",
       " 'then': 83,\n",
       " 'time': 84,\n",
       " 'may': 85,\n",
       " 'were': 86,\n",
       " 'did': 87,\n",
       " 'them': 88,\n",
       " 'now': 89,\n",
       " 'being': 90,\n",
       " 'user': 91,\n",
       " 'their': 92,\n",
       " 'than': 93,\n",
       " 'thanks': 94,\n",
       " 'even': 95,\n",
       " 'get': 96,\n",
       " 'make': 97,\n",
       " 'good': 98,\n",
       " 'had': 99,\n",
       " 'well': 100,\n",
       " 'very': 101,\n",
       " 'information': 102,\n",
       " 'does': 103,\n",
       " 'could': 104,\n",
       " 'want': 105,\n",
       " 'deletion': 106,\n",
       " 'its': 107,\n",
       " 'such': 108,\n",
       " 'sources': 109,\n",
       " 'way': 110,\n",
       " 'name': 111,\n",
       " 'these': 112,\n",
       " 'first': 113,\n",
       " 'wp': 114,\n",
       " 'help': 115,\n",
       " 'pages': 116,\n",
       " 'new': 117,\n",
       " 'image': 118,\n",
       " 'source': 119,\n",
       " 'editing': 120,\n",
       " 'go': 121,\n",
       " 'need': 122,\n",
       " 'section': 123,\n",
       " 'say': 124,\n",
       " 'again': 125,\n",
       " 'edits': 126,\n",
       " 'thank': 127,\n",
       " 'where': 128,\n",
       " 'fuck': 129,\n",
       " 'made': 130,\n",
       " 'many': 131,\n",
       " 'much': 132,\n",
       " 'used': 133,\n",
       " 'really': 134,\n",
       " 'most': 135,\n",
       " 'deleted': 136,\n",
       " 'discussion': 137,\n",
       " 'same': 138,\n",
       " 'find': 139,\n",
       " 'into': 140,\n",
       " 'work': 141,\n",
       " 'those': 142,\n",
       " 'since': 143,\n",
       " \"i've\": 144,\n",
       " 'right': 145,\n",
       " 'point': 146,\n",
       " 'before': 147,\n",
       " 'after': 148,\n",
       " 'add': 149,\n",
       " 'read': 150,\n",
       " 'look': 151,\n",
       " 'over': 152,\n",
       " 'him': 153,\n",
       " 'take': 154,\n",
       " 'two': 155,\n",
       " 'still': 156,\n",
       " 'back': 157,\n",
       " 'wiki': 158,\n",
       " 'someone': 159,\n",
       " 'fact': 160,\n",
       " 'hi': 161,\n",
       " 'too': 162,\n",
       " 'list': 163,\n",
       " 'link': 164,\n",
       " 'own': 165,\n",
       " 'said': 166,\n",
       " 'something': 167,\n",
       " 'going': 168,\n",
       " 'blocked': 169,\n",
       " '1': 170,\n",
       " '2': 171,\n",
       " 'stop': 172,\n",
       " \"you're\": 173,\n",
       " 'content': 174,\n",
       " 'without': 175,\n",
       " 'block': 176,\n",
       " 'under': 177,\n",
       " 'history': 178,\n",
       " 'http': 179,\n",
       " 'our': 180,\n",
       " 'added': 181,\n",
       " 'utc': 182,\n",
       " 'editors': 183,\n",
       " 'another': 184,\n",
       " 'removed': 185,\n",
       " 'her': 186,\n",
       " 'might': 187,\n",
       " 'welcome': 188,\n",
       " 'note': 189,\n",
       " 'however': 190,\n",
       " 'free': 191,\n",
       " 'place': 192,\n",
       " 'sure': 193,\n",
       " 'case': 194,\n",
       " 'never': 195,\n",
       " \"doesn't\": 196,\n",
       " 'done': 197,\n",
       " 'us': 198,\n",
       " 'vandalism': 199,\n",
       " 'reason': 200,\n",
       " 'put': 201,\n",
       " 'comment': 202,\n",
       " 'personal': 203,\n",
       " 'better': 204,\n",
       " \"that's\": 205,\n",
       " 'yourself': 206,\n",
       " 'using': 207,\n",
       " 'seems': 208,\n",
       " 'ask': 209,\n",
       " 'actually': 210,\n",
       " 'question': 211,\n",
       " 'off': 212,\n",
       " 'while': 213,\n",
       " 'feel': 214,\n",
       " 'anything': 215,\n",
       " 'believe': 216,\n",
       " 'links': 217,\n",
       " 'person': 218,\n",
       " 'things': 219,\n",
       " 'both': 220,\n",
       " 'she': 221,\n",
       " 'best': 222,\n",
       " 'comments': 223,\n",
       " 'policy': 224,\n",
       " 'part': 225,\n",
       " 'hope': 226,\n",
       " 'against': 227,\n",
       " \"can't\": 228,\n",
       " 'already': 229,\n",
       " 'keep': 230,\n",
       " 'thing': 231,\n",
       " '3': 232,\n",
       " 'u': 233,\n",
       " \"didn't\": 234,\n",
       " 'questions': 235,\n",
       " \"i'll\": 236,\n",
       " 'com': 237,\n",
       " 'nothing': 238,\n",
       " 'change': 239,\n",
       " 'wrong': 240,\n",
       " 'though': 241,\n",
       " 'subject': 242,\n",
       " 'problem': 243,\n",
       " 'remove': 244,\n",
       " 'little': 245,\n",
       " 'copyright': 246,\n",
       " 'tag': 247,\n",
       " '•': 248,\n",
       " 'trying': 249,\n",
       " 'long': 250,\n",
       " 'must': 251,\n",
       " 'understand': 252,\n",
       " 'above': 253,\n",
       " 'speedy': 254,\n",
       " 'anyone': 255,\n",
       " 'few': 256,\n",
       " 'world': 257,\n",
       " 'issue': 258,\n",
       " 'last': 259,\n",
       " 'others': 260,\n",
       " 'give': 261,\n",
       " 'editor': 262,\n",
       " 'sorry': 263,\n",
       " 'agree': 264,\n",
       " 'reliable': 265,\n",
       " 'rather': 266,\n",
       " 'let': 267,\n",
       " 'years': 268,\n",
       " 'fair': 269,\n",
       " 'english': 270,\n",
       " 'different': 271,\n",
       " 'making': 272,\n",
       " 'reference': 273,\n",
       " 'come': 274,\n",
       " 'style': 275,\n",
       " 'text': 276,\n",
       " 'references': 277,\n",
       " 'mean': 278,\n",
       " 'try': 279,\n",
       " 'non': 280,\n",
       " 'continue': 281,\n",
       " 'doing': 282,\n",
       " 'great': 283,\n",
       " 'found': 284,\n",
       " 'leave': 285,\n",
       " 'word': 286,\n",
       " 'says': 287,\n",
       " 'got': 288,\n",
       " 'state': 289,\n",
       " 'original': 290,\n",
       " \"isn't\": 291,\n",
       " 'probably': 292,\n",
       " 'site': 293,\n",
       " 'adding': 294,\n",
       " 'every': 295,\n",
       " 'check': 296,\n",
       " 'day': 297,\n",
       " 'simply': 298,\n",
       " 'created': 299,\n",
       " 'life': 300,\n",
       " 'top': 301,\n",
       " 'hello': 302,\n",
       " 'show': 303,\n",
       " 'post': 304,\n",
       " 'either': 305,\n",
       " 'consensus': 306,\n",
       " 'ip': 307,\n",
       " 'least': 308,\n",
       " 'delete': 309,\n",
       " 'else': 310,\n",
       " 'e': 311,\n",
       " 'yes': 312,\n",
       " 'view': 313,\n",
       " 'war': 314,\n",
       " 'far': 315,\n",
       " 'notable': 316,\n",
       " 'enough': 317,\n",
       " 'request': 318,\n",
       " 'etc': 319,\n",
       " 'example': 320,\n",
       " 'opinion': 321,\n",
       " 'contributions': 322,\n",
       " 'called': 323,\n",
       " 'around': 324,\n",
       " 'through': 325,\n",
       " 'www': 326,\n",
       " 'between': 327,\n",
       " 'real': 328,\n",
       " 'yet': 329,\n",
       " 'write': 330,\n",
       " 'reverted': 331,\n",
       " 'book': 332,\n",
       " 'shit': 333,\n",
       " 'down': 334,\n",
       " 'matter': 335,\n",
       " 'admin': 336,\n",
       " 're': 337,\n",
       " 'thought': 338,\n",
       " 'given': 339,\n",
       " 'images': 340,\n",
       " 'account': 341,\n",
       " 'material': 342,\n",
       " 'users': 343,\n",
       " 'bad': 344,\n",
       " 'encyclopedia': 345,\n",
       " 'having': 346,\n",
       " 'clearly': 347,\n",
       " 'title': 348,\n",
       " 'message': 349,\n",
       " 'support': 350,\n",
       " 'needs': 351,\n",
       " 'lot': 352,\n",
       " 'old': 353,\n",
       " 'evidence': 354,\n",
       " '—': 355,\n",
       " 'ever': 356,\n",
       " 'maybe': 357,\n",
       " 's': 358,\n",
       " 'tell': 359,\n",
       " 'revert': 360,\n",
       " 'seem': 361,\n",
       " 'language': 362,\n",
       " 'instead': 363,\n",
       " 'correct': 364,\n",
       " 'template': 365,\n",
       " 'org': 366,\n",
       " 'number': 367,\n",
       " 'clear': 368,\n",
       " 'media': 369,\n",
       " 'important': 370,\n",
       " 'saying': 371,\n",
       " 'pov': 372,\n",
       " '5': 373,\n",
       " '4': 374,\n",
       " 'always': 375,\n",
       " 'written': 376,\n",
       " 'true': 377,\n",
       " 'oh': 378,\n",
       " 'term': 379,\n",
       " 'further': 380,\n",
       " 'states': 381,\n",
       " 'hate': 382,\n",
       " 'quite': 383,\n",
       " 'perhaps': 384,\n",
       " 'review': 385,\n",
       " 'until': 386,\n",
       " 'bit': 387,\n",
       " 'whether': 388,\n",
       " \"i'd\": 389,\n",
       " 'research': 390,\n",
       " 'consider': 391,\n",
       " 'claim': 392,\n",
       " 'guidelines': 393,\n",
       " 'fucking': 394,\n",
       " 'version': 395,\n",
       " 'once': 396,\n",
       " 'based': 397,\n",
       " 'criteria': 398,\n",
       " 'times': 399,\n",
       " 'nigger': 400,\n",
       " 'website': 401,\n",
       " 'getting': 402,\n",
       " 'suck': 403,\n",
       " 'mention': 404,\n",
       " 'three': 405,\n",
       " 'several': 406,\n",
       " 'makes': 407,\n",
       " 'considered': 408,\n",
       " 'words': 409,\n",
       " 'c': 410,\n",
       " 'year': 411,\n",
       " 'hey': 412,\n",
       " 'changes': 413,\n",
       " 'idea': 414,\n",
       " \"there's\": 415,\n",
       " 'cannot': 416,\n",
       " 'ass': 417,\n",
       " 'address': 418,\n",
       " 'notice': 419,\n",
       " 'current': 420,\n",
       " 'group': 421,\n",
       " 'left': 422,\n",
       " 'following': 423,\n",
       " 'listed': 424,\n",
       " 'each': 425,\n",
       " 'date': 426,\n",
       " 'second': 427,\n",
       " 'means': 428,\n",
       " 'facts': 429,\n",
       " 'rules': 430,\n",
       " 'general': 431,\n",
       " 'possible': 432,\n",
       " 'main': 433,\n",
       " 'care': 434,\n",
       " 'regarding': 435,\n",
       " 'american': 436,\n",
       " 'man': 437,\n",
       " 'start': 438,\n",
       " '10': 439,\n",
       " 'topic': 440,\n",
       " 'mentioned': 441,\n",
       " 'course': 442,\n",
       " 'attack': 443,\n",
       " 'kind': 444,\n",
       " 'whole': 445,\n",
       " 'statement': 446,\n",
       " 'known': 447,\n",
       " 'end': 448,\n",
       " 'include': 449,\n",
       " 'issues': 450,\n",
       " 'seen': 451,\n",
       " 'create': 452,\n",
       " 'jpg': 453,\n",
       " 'dont': 454,\n",
       " 'en': 455,\n",
       " 'gay': 456,\n",
       " 'less': 457,\n",
       " 'related': 458,\n",
       " 'call': 459,\n",
       " 'ok': 460,\n",
       " 'sense': 461,\n",
       " 'big': 462,\n",
       " 'suggest': 463,\n",
       " 'happy': 464,\n",
       " 'category': 465,\n",
       " 'including': 466,\n",
       " 'notability': 467,\n",
       " 'info': 468,\n",
       " '2005': 469,\n",
       " 'provide': 470,\n",
       " 'redirect': 471,\n",
       " 'days': 472,\n",
       " 'move': 473,\n",
       " 'myself': 474,\n",
       " 'sentence': 475,\n",
       " \"wikipedia's\": 476,\n",
       " 'love': 477,\n",
       " 'four': 478,\n",
       " 'appropriate': 479,\n",
       " 'school': 480,\n",
       " 'news': 481,\n",
       " 'project': 482,\n",
       " 'changed': 483,\n",
       " 'explain': 484,\n",
       " 'started': 485,\n",
       " 'neutral': 486,\n",
       " 'line': 487,\n",
       " 'mind': 488,\n",
       " 'anyway': 489,\n",
       " 'contribs': 490,\n",
       " 'included': 491,\n",
       " 'removing': 492,\n",
       " 'next': 493,\n",
       " 't': 494,\n",
       " 'looking': 495,\n",
       " 'picture': 496,\n",
       " 'specific': 497,\n",
       " 'community': 498,\n",
       " 'although': 499,\n",
       " 'per': 500,\n",
       " 'order': 501,\n",
       " 'relevant': 502,\n",
       " 'sign': 503,\n",
       " 'die': 504,\n",
       " 'answer': 505,\n",
       " 'away': 506,\n",
       " 'interest': 507,\n",
       " 'full': 508,\n",
       " 'warning': 509,\n",
       " 'lol': 510,\n",
       " 'summary': 511,\n",
       " 'recent': 512,\n",
       " 'later': 513,\n",
       " 'file': 514,\n",
       " 'policies': 515,\n",
       " \"you've\": 516,\n",
       " 'faith': 517,\n",
       " 'claims': 518,\n",
       " 'discuss': 519,\n",
       " 'attacks': 520,\n",
       " 'public': 521,\n",
       " '0': 522,\n",
       " 'currently': 523,\n",
       " 'wrote': 524,\n",
       " 'writing': 525,\n",
       " 'especially': 526,\n",
       " 'interested': 527,\n",
       " 'able': 528,\n",
       " 'wish': 529,\n",
       " 'taken': 530,\n",
       " '6': 531,\n",
       " 'names': 532,\n",
       " 'position': 533,\n",
       " 'single': 534,\n",
       " 'within': 535,\n",
       " 'stuff': 536,\n",
       " 'below': 537,\n",
       " '2006': 538,\n",
       " 'during': 539,\n",
       " 'wanted': 540,\n",
       " 'web': 541,\n",
       " 'appears': 542,\n",
       " 'official': 543,\n",
       " '20': 544,\n",
       " 'live': 545,\n",
       " 'certainly': 546,\n",
       " 'nice': 547,\n",
       " 'color': 548,\n",
       " 'self': 549,\n",
       " 'itself': 550,\n",
       " 'country': 551,\n",
       " 'everyone': 552,\n",
       " 'report': 553,\n",
       " 'anti': 554,\n",
       " 'background': 555,\n",
       " 'lead': 556,\n",
       " 'high': 557,\n",
       " 'common': 558,\n",
       " 'god': 559,\n",
       " 'unless': 560,\n",
       " 'according': 561,\n",
       " 'completely': 562,\n",
       " 'hard': 563,\n",
       " 'books': 564,\n",
       " 'pretty': 565,\n",
       " '7': 566,\n",
       " 'everything': 567,\n",
       " 'p': 568,\n",
       " 'published': 569,\n",
       " 'due': 570,\n",
       " '24': 571,\n",
       " 'process': 572,\n",
       " 'edited': 573,\n",
       " 'looks': 574,\n",
       " 'involved': 575,\n",
       " 'fat': 576,\n",
       " 'therefore': 577,\n",
       " \"won't\": 578,\n",
       " 'remember': 579,\n",
       " 'obviously': 580,\n",
       " 'power': 581,\n",
       " 'd': 582,\n",
       " 'future': 583,\n",
       " 'nor': 584,\n",
       " '100': 585,\n",
       " 'truth': 586,\n",
       " 'came': 587,\n",
       " 'sandbox': 588,\n",
       " '11': 589,\n",
       " 'response': 590,\n",
       " 'party': 591,\n",
       " 'reading': 592,\n",
       " 'stay': 593,\n",
       " 'past': 594,\n",
       " 'game': 595,\n",
       " 'learn': 596,\n",
       " 'admins': 597,\n",
       " 'quote': 598,\n",
       " 'asked': 599,\n",
       " \"wasn't\": 600,\n",
       " 'b': 601,\n",
       " 'city': 602,\n",
       " 'entry': 603,\n",
       " 'stupid': 604,\n",
       " \"he's\": 605,\n",
       " 'posted': 606,\n",
       " 'false': 607,\n",
       " 'faggot': 608,\n",
       " 'whatever': 609,\n",
       " 'google': 610,\n",
       " 'talking': 611,\n",
       " 'ago': 612,\n",
       " '8': 613,\n",
       " 'placed': 614,\n",
       " 'political': 615,\n",
       " 'similar': 616,\n",
       " 'today': 617,\n",
       " 'system': 618,\n",
       " 'administrator': 619,\n",
       " 'united': 620,\n",
       " 'argument': 621,\n",
       " 'paragraph': 622,\n",
       " 'working': 623,\n",
       " 'exactly': 624,\n",
       " '2007': 625,\n",
       " 'guy': 626,\n",
       " '12': 627,\n",
       " 'british': 628,\n",
       " 'took': 629,\n",
       " 'useful': 630,\n",
       " 'government': 631,\n",
       " 'search': 632,\n",
       " 'noticed': 633,\n",
       " 'moron': 634,\n",
       " 'regards': 635,\n",
       " 'small': 636,\n",
       " 'reasons': 637,\n",
       " 'side': 638,\n",
       " '2008': 639,\n",
       " 'form': 640,\n",
       " 'national': 641,\n",
       " 'dispute': 642,\n",
       " 'deleting': 643,\n",
       " 'five': 644,\n",
       " 'guess': 645,\n",
       " 'appreciate': 646,\n",
       " 'particular': 647,\n",
       " 'reverting': 648,\n",
       " 'major': 649,\n",
       " 'problems': 650,\n",
       " 'law': 651,\n",
       " '000': 652,\n",
       " '15': 653,\n",
       " 'npov': 654,\n",
       " 'bitch': 655,\n",
       " 'rule': 656,\n",
       " 'banned': 657,\n",
       " 'often': 658,\n",
       " 'provided': 659,\n",
       " 'music': 660,\n",
       " 'become': 661,\n",
       " 'wikiproject': 662,\n",
       " 'needed': 663,\n",
       " 'status': 664,\n",
       " 'reply': 665,\n",
       " 'knowledge': 666,\n",
       " 'tried': 667,\n",
       " 'along': 668,\n",
       " 'almost': 669,\n",
       " 'cheers': 670,\n",
       " 'stated': 671,\n",
       " 'username': 672,\n",
       " 'film': 673,\n",
       " '9': 674,\n",
       " 'taking': 675,\n",
       " 'fine': 676,\n",
       " '–': 677,\n",
       " 'company': 678,\n",
       " 'vandalize': 679,\n",
       " 'present': 680,\n",
       " 'certain': 681,\n",
       " 'white': 682,\n",
       " 'follow': 683,\n",
       " 'sort': 684,\n",
       " 'otherwise': 685,\n",
       " 'terms': 686,\n",
       " 'points': 687,\n",
       " 'explanation': 688,\n",
       " 'uploaded': 689,\n",
       " \"haven't\": 690,\n",
       " 'description': 691,\n",
       " 'generally': 692,\n",
       " 'recently': 693,\n",
       " 'entire': 694,\n",
       " 'open': 695,\n",
       " 'story': 696,\n",
       " 'tags': 697,\n",
       " 'shows': 698,\n",
       " 'alone': 699,\n",
       " 'ban': 700,\n",
       " 'citation': 701,\n",
       " 'short': 702,\n",
       " 'definition': 703,\n",
       " '14': 704,\n",
       " 'cited': 705,\n",
       " 'likely': 706,\n",
       " 'aware': 707,\n",
       " 'g': 708,\n",
       " 'saw': 709,\n",
       " 'class': 710,\n",
       " 'type': 711,\n",
       " 'soon': 712,\n",
       " 'set': 713,\n",
       " 'week': 714,\n",
       " 'indeed': 715,\n",
       " 'band': 716,\n",
       " 'cite': 717,\n",
       " 'decide': 718,\n",
       " 'mr': 719,\n",
       " 'views': 720,\n",
       " '2004': 721,\n",
       " 'appear': 722,\n",
       " 'family': 723,\n",
       " 'simple': 724,\n",
       " 'area': 725,\n",
       " 'guys': 726,\n",
       " 'theory': 727,\n",
       " 'piece': 728,\n",
       " 'contributing': 729,\n",
       " 'contact': 730,\n",
       " 'external': 731,\n",
       " 'result': 732,\n",
       " 'test': 733,\n",
       " 'internet': 734,\n",
       " 'interesting': 735,\n",
       " 'unblock': 736,\n",
       " 'actual': 737,\n",
       " 'improve': 738,\n",
       " 'copy': 739,\n",
       " '16': 740,\n",
       " 'sourced': 741,\n",
       " 'jew': 742,\n",
       " 'told': 743,\n",
       " 'attention': 744,\n",
       " 'proposed': 745,\n",
       " 'obvious': 746,\n",
       " 'moved': 747,\n",
       " 'email': 748,\n",
       " 'uk': 749,\n",
       " 'members': 750,\n",
       " 'various': 751,\n",
       " 'allowed': 752,\n",
       " 'themselves': 753,\n",
       " 'conflict': 754,\n",
       " 'context': 755,\n",
       " \"article's\": 756,\n",
       " 'black': 757,\n",
       " 'university': 758,\n",
       " 'author': 759,\n",
       " 'thus': 760,\n",
       " 'disagree': 761,\n",
       " 'cunt': 762,\n",
       " 'john': 763,\n",
       " 'went': 764,\n",
       " 'citations': 765,\n",
       " 'sites': 766,\n",
       " 'jews': 767,\n",
       " 'actions': 768,\n",
       " 'hand': 769,\n",
       " 'bias': 770,\n",
       " 'previous': 771,\n",
       " 'third': 772,\n",
       " 'hours': 773,\n",
       " 'human': 774,\n",
       " '18': 775,\n",
       " 'works': 776,\n",
       " 'nonsense': 777,\n",
       " 'science': 778,\n",
       " 'ones': 779,\n",
       " 'death': 780,\n",
       " 'action': 781,\n",
       " '17': 782,\n",
       " 'enjoy': 783,\n",
       " \"aren't\": 784,\n",
       " 'job': 785,\n",
       " 'proper': 786,\n",
       " 'longer': 787,\n",
       " 'large': 788,\n",
       " 'together': 789,\n",
       " 'sucks': 790,\n",
       " '\\xa0': 791,\n",
       " '13': 792,\n",
       " 'addition': 793,\n",
       " \"wouldn't\": 794,\n",
       " 'avoid': 795,\n",
       " 'creating': 796,\n",
       " 'happened': 797,\n",
       " '19': 798,\n",
       " 'valid': 799,\n",
       " 'jewish': 800,\n",
       " 'german': 801,\n",
       " 'deal': 802,\n",
       " '21': 803,\n",
       " 'automatically': 804,\n",
       " 'biased': 805,\n",
       " 'proof': 806,\n",
       " 'worked': 807,\n",
       " 'im': 808,\n",
       " 'series': 809,\n",
       " 'dick': 810,\n",
       " 'goes': 811,\n",
       " 'himself': 812,\n",
       " 'seriously': 813,\n",
       " \"what's\": 814,\n",
       " '23': 815,\n",
       " 'level': 816,\n",
       " 'standard': 817,\n",
       " 'f': 818,\n",
       " '2009': 819,\n",
       " 'accepted': 820,\n",
       " 'respect': 821,\n",
       " 'exist': 822,\n",
       " 'available': 823,\n",
       " 'de': 824,\n",
       " 'helpful': 825,\n",
       " 'video': 826,\n",
       " 'comes': 827,\n",
       " '22': 828,\n",
       " 'meaning': 829,\n",
       " \"shouldn't\": 830,\n",
       " 'manual': 831,\n",
       " 'living': 832,\n",
       " 'opinions': 833,\n",
       " 'sex': 834,\n",
       " 'rights': 835,\n",
       " 'act': 836,\n",
       " 'tildes': 837,\n",
       " 'criticism': 838,\n",
       " 'play': 839,\n",
       " '2010': 840,\n",
       " 'necessary': 841,\n",
       " 'calling': 842,\n",
       " 'accept': 843,\n",
       " 'sections': 844,\n",
       " 'indicate': 845,\n",
       " 'personally': 846,\n",
       " 'yeah': 847,\n",
       " '30': 848,\n",
       " 'july': 849,\n",
       " 'hell': 850,\n",
       " 'accurate': 851,\n",
       " 'violation': 852,\n",
       " 'statements': 853,\n",
       " 'pig': 854,\n",
       " 'attempt': 855,\n",
       " 'months': 856,\n",
       " 'assume': 857,\n",
       " 'afd': 858,\n",
       " 'upon': 859,\n",
       " 'historical': 860,\n",
       " 'usually': 861,\n",
       " 'debate': 862,\n",
       " \"let's\": 863,\n",
       " 'pro': 864,\n",
       " 'details': 865,\n",
       " 'multiple': 866,\n",
       " 'blocking': 867,\n",
       " 'south': 868,\n",
       " 'rest': 869,\n",
       " 'tagged': 870,\n",
       " 'width': 871,\n",
       " 'serious': 872,\n",
       " 'doubt': 873,\n",
       " 'record': 874,\n",
       " 'greek': 875,\n",
       " 'm': 876,\n",
       " 'r': 877,\n",
       " \"they're\": 878,\n",
       " 'separate': 879,\n",
       " 'v': 880,\n",
       " 'space': 881,\n",
       " 'situation': 882,\n",
       " 'cause': 883,\n",
       " \"you'll\": 884,\n",
       " 'speak': 885,\n",
       " 'heard': 886,\n",
       " 'explaining': 887,\n",
       " 'okay': 888,\n",
       " 'refer': 889,\n",
       " 'fix': 890,\n",
       " 'run': 891,\n",
       " 'quality': 892,\n",
       " 'data': 893,\n",
       " 'complete': 894,\n",
       " 'penis': 895,\n",
       " 'sock': 896,\n",
       " 'church': 897,\n",
       " 'w': 898,\n",
       " 'messages': 899,\n",
       " 'none': 900,\n",
       " 'india': 901,\n",
       " 'asking': 902,\n",
       " 'august': 903,\n",
       " 'online': 904,\n",
       " 'lack': 905,\n",
       " 'legal': 906,\n",
       " 'period': 907,\n",
       " 'freedom': 908,\n",
       " 'team': 909,\n",
       " 'military': 910,\n",
       " 'rationale': 911,\n",
       " 'behavior': 912,\n",
       " 'prove': 913,\n",
       " 'apparently': 914,\n",
       " 'access': 915,\n",
       " 'special': 916,\n",
       " 'close': 917,\n",
       " 'changing': 918,\n",
       " 'bullshit': 919,\n",
       " 'directly': 920,\n",
       " 'watch': 921,\n",
       " 'culture': 922,\n",
       " 'difference': 923,\n",
       " 'march': 924,\n",
       " 'early': 925,\n",
       " 'box': 926,\n",
       " 'contribute': 927,\n",
       " 'wikipedian': 928,\n",
       " 'existing': 929,\n",
       " 'huge': 930,\n",
       " 'gets': 931,\n",
       " 'html': 932,\n",
       " 'couple': 933,\n",
       " '25': 934,\n",
       " 'among': 935,\n",
       " 'civil': 936,\n",
       " 'warring': 937,\n",
       " 'supposed': 938,\n",
       " 'primary': 939,\n",
       " 'except': 940,\n",
       " 'head': 941,\n",
       " 'countries': 942,\n",
       " 'born': 943,\n",
       " 'meant': 944,\n",
       " 'modern': 945,\n",
       " '50': 946,\n",
       " 'photo': 947,\n",
       " 'described': 948,\n",
       " 'incorrect': 949,\n",
       " 'fish': 950,\n",
       " 'uses': 951,\n",
       " 'disruptive': 952,\n",
       " 'significant': 953,\n",
       " 'field': 954,\n",
       " 'specifically': 955,\n",
       " 'red': 956,\n",
       " 'purpose': 957,\n",
       " 'pillars': 958,\n",
       " 'friend': 959,\n",
       " 'release': 960,\n",
       " 'archive': 961,\n",
       " 'million': 962,\n",
       " 'produce': 963,\n",
       " 'tv': 964,\n",
       " 'error': 965,\n",
       " 'force': 966,\n",
       " 'table': 967,\n",
       " 'earlier': 968,\n",
       " 'business': 969,\n",
       " 'computer': 970,\n",
       " 'june': 971,\n",
       " 'sometimes': 972,\n",
       " 'half': 973,\n",
       " 'cases': 974,\n",
       " 'outside': 975,\n",
       " 'vote': 976,\n",
       " 'x': 977,\n",
       " 'inclusion': 978,\n",
       " 'particularly': 979,\n",
       " 'character': 980,\n",
       " 'pictures': 981,\n",
       " 'gave': 982,\n",
       " 'linked': 983,\n",
       " 'abuse': 984,\n",
       " '27': 985,\n",
       " 'control': 986,\n",
       " 'possibly': 987,\n",
       " 'numbers': 988,\n",
       " 'home': 989,\n",
       " 'anonymous': 990,\n",
       " 'member': 991,\n",
       " 'january': 992,\n",
       " 'christian': 993,\n",
       " 'scientific': 994,\n",
       " 'arguments': 995,\n",
       " 'tutorial': 996,\n",
       " '2012': 997,\n",
       " 'n': 998,\n",
       " 'reported': 999,\n",
       " 'border': 1000,\n",
       " ...}"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "word_index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "评论单词210337，没有词向量1401\n"
     ]
    }
   ],
   "source": [
    "embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size))\n",
    "words_without_wv = 0\n",
    "for word, i in word_index.items():\n",
    "    if i >= max_features: \n",
    "        continue\n",
    "    embedding_vector = embedding_index.get(word)\n",
    "    if embedding_vector is not None:\n",
    "        embedding_matrix[i] = embedding_vector\n",
    "    else:\n",
    "        words_without_wv += 1\n",
    "print(\"评论单词{}，没有词向量{:2.%}\".format(len(word_index), words_without_wv))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "词汇表中0.67%单词没有词向量\n"
     ]
    }
   ],
   "source": [
    "print(\"词汇表中{:.2%}单词没有词向量\".format(words_without_wv/len(word_index)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 1.02822436,  0.16354269,  0.36796174, ..., -0.23140568,\n",
       "        -0.75071671, -0.2013963 ],\n",
       "       [ 0.41800001,  0.24968   , -0.41242   , ..., -0.18411   ,\n",
       "        -0.11514   , -0.78580999],\n",
       "       [ 0.68046999, -0.039263  ,  0.30186   , ..., -0.073297  ,\n",
       "        -0.064699  , -0.26043999],\n",
       "       ...,\n",
       "       [-0.13491   , -0.8635    , -0.033898  , ...,  0.29484999,\n",
       "        -0.24315999,  0.81682003],\n",
       "       [ 0.1178    ,  0.14624   , -0.28240001, ..., -0.19529   ,\n",
       "        -0.13610999,  1.04270005],\n",
       "       [-0.64388001, -0.54152   ,  0.10305   , ..., -0.06732   ,\n",
       "        -0.73308003,  0.88625002]])"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "embedding_matrix"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.训练模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From D:\\Program\\Anaconda3\\envs\\nlp\\lib\\site-packages\\tensorflow\\python\\framework\\op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Colocations handled automatically by placer.\n",
      "WARNING:tensorflow:From D:\\Program\\Anaconda3\\envs\\nlp\\lib\\site-packages\\keras\\backend\\tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.\n"
     ]
    }
   ],
   "source": [
    "inp = Input(shape=(maxlen, ))\n",
    "x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp)\n",
    "x = Bidirectional(\n",
    "    LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))(x)\n",
    "x = GlobalMaxPool1D()(x)\n",
    "x = Dense(50, activation='relu')(x)\n",
    "x = Dropout(0.1)(x)\n",
    "x = Dense(6, activation='sigmoid')(x)\n",
    "model = Model(inputs=inp, outputs=x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss='binary_crossentropy', optimizer='adam',\n",
    "              metrics=['accuracy'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From D:\\Program\\Anaconda3\\envs\\nlp\\lib\\site-packages\\tensorflow\\python\\ops\\math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use tf.cast instead.\n",
      "WARNING:tensorflow:From D:\\Program\\Anaconda3\\envs\\nlp\\lib\\site-packages\\tensorflow\\python\\ops\\math_grad.py:102: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Deprecated in favor of operator or tf.math.divide.\n",
      "Train on 143613 samples, validate on 15958 samples\n",
      "Epoch 1/2\n",
      "143613/143613 [==============================] - 712s 5ms/step - loss: 0.0596 - acc: 0.9793 - val_loss: 0.0484 - val_acc: 0.9822\n",
      "Epoch 2/2\n",
      "143613/143613 [==============================] - 685s 5ms/step - loss: 0.0444 - acc: 0.9832 - val_loss: 0.0462 - val_acc: 0.9832\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x198a452d0c8>"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "history = model.fit(X_train, y_train, batch_size=32, epochs=2, validation_split=0.1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "153164/153164 [==============================] - 42s 275us/step\n"
     ]
    }
   ],
   "source": [
    "y_test_pred = model.predict([X_test], batch_size=1024, verbose=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.99395025, 0.22166064, 0.9438987 , 0.01606953, 0.8902844 ,\n",
       "       0.04045072], dtype=float32)"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test_pred[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"Yo bitch Ja Rule is more succesful then you'll ever be whats up with you and hating you sad mofuckas...i should bitch slap ur pethedic white faces and get you to kiss my ass you guys sicken me. Ja rule is about pride in da music man. dont diss that shit on him. and nothin is wrong bein like tupac he was a brother too...fuckin white boys get things right next time.,\""
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.iloc[0]['comment_text']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.TextCNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.layers import Conv2D, MaxPooling2D, Concatenate, Reshape, Flatten"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [],
   "source": [
    "inp = Input(shape=(maxlen, ))\n",
    "x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp)\n",
    "x = Reshape((maxlen, embed_size, 1))(x)\n",
    "x = Dropout(0.2)(x)\n",
    "\n",
    "x_pooled = []\n",
    "filters = 32\n",
    "kernel_sizes = np.arange(1, 6)\n",
    "\n",
    "for s in kernel_sizes:\n",
    "    y = Conv2D(filters,\n",
    "               kernel_size=(s, embed_size),\n",
    "               padding='valid',\n",
    "               activation='relu',\n",
    "               strides=1)(x)\n",
    "    y = MaxPooling2D(pool_size=(maxlen - s + 1, 1))(y)\n",
    "    x_pooled.append(y)\n",
    "\n",
    "x = Concatenate(axis=1)(x_pooled)\n",
    "x = Flatten()(x)\n",
    "x = Dropout(0.1)(x)\n",
    "x = Dense(50, activation='relu')(x)\n",
    "x = Dropout(0.1)(x)\n",
    "x = Dense(6, activation='sigmoid')(x)\n",
    "model = Model(inputs=inp, outputs=x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(loss='binary_crossentropy', optimizer='adam',\n",
    "              metrics=['accuracy'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 143613 samples, validate on 15958 samples\n",
      "Epoch 1/4\n",
      "143613/143613 [==============================] - 267s 2ms/step - loss: 0.0614 - acc: 0.9786 - val_loss: 0.0549 - val_acc: 0.9800\n",
      "Epoch 2/4\n",
      "143613/143613 [==============================] - 247s 2ms/step - loss: 0.0487 - acc: 0.9817 - val_loss: 0.0497 - val_acc: 0.9818\n",
      "Epoch 3/4\n",
      "143613/143613 [==============================] - 252s 2ms/step - loss: 0.0441 - acc: 0.9829 - val_loss: 0.0476 - val_acc: 0.9824\n",
      "Epoch 4/4\n",
      "143613/143613 [==============================] - 255s 2ms/step - loss: 0.0413 - acc: 0.9838 - val_loss: 0.0481 - val_acc: 0.9828\n"
     ]
    }
   ],
   "source": [
    "history = model.fit(X_train, y_train, batch_size=32, epochs=4, validation_split=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6.BiGRU+Pool"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.layers import SpatialDropout1D, GlobalAveragePooling1D, GlobalMaxPooling1D, GRU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [],
   "source": [
    "inp = Input(shape=(maxlen, ))\n",
    "x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp)\n",
    "x = SpatialDropout1D(0.2)(x)\n",
    "x = Bidirectional(GRU(80, return_sequences=True))(x)\n",
    "avg_pool = GlobalAveragePooling1D()(x)\n",
    "max_pool = GlobalMaxPooling1D()(x)\n",
    "conc = concatenate([avg_pool, max_pool])\n",
    "outp = Dense(6, activation=\"sigmoid\")(conc)\n",
    "\n",
    "model = Model(inputs=inp, outputs=outp)\n",
    "model.compile(loss='binary_crossentropy',\n",
    "              optimizer='adam',\n",
    "              metrics=['accuracy'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 143613 samples, validate on 15958 samples\n",
      "Epoch 1/2\n",
      "143613/143613 [==============================] - 766s 5ms/step - loss: 0.0587 - acc: 0.9795 - val_loss: 0.0486 - val_acc: 0.9820\n",
      "Epoch 2/2\n",
      "143613/143613 [==============================] - 652s 5ms/step - loss: 0.0446 - acc: 0.9831 - val_loss: 0.0458 - val_acc: 0.9830\n"
     ]
    }
   ],
   "source": [
    "history = model.fit(X_train, y_train, batch_size=32, epochs=2, validation_split=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": true
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
